[med-svn] [manta] 01/02: New upstream version 1.0.2+dfsg

Andreas Tille tille at debian.org
Tue Nov 15 13:36:57 UTC 2016


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository manta.

commit bbe60d413c2526f9c91d6e3cc48b465081190f9e
Author: Andreas Tille <tille at debian.org>
Date:   Tue Nov 15 14:35:14 2016 +0100

    New upstream version 1.0.2+dfsg
---
 CMakeLists.txt                                     |  213 ++
 COPYRIGHT.txt                                      |  242 +++
 ChangeLog.txt                                      |  288 +++
 LICENSE.txt                                        |  674 ++++++
 README.md                                          |   79 +
 configure                                          |  320 +++
 docs/README.md                                     |    8 +
 docs/developerGuide/ID.md                          |   32 +
 docs/developerGuide/README.md                      |  231 ++
 docs/developerGuide/alignment.md                   |   43 +
 docs/developerGuide/breakendGraph.md               |   32 +
 docs/developerGuide/debugFullRun.md                |   46 +
 docs/developerGuide/debugSingleSV.md               |  123 ++
 docs/developerGuide/testAssembler.md               |   24 +
 docs/methods/README.md                             |   14 +
 .../primary/figure_data/jumpstate/jumpstate.dot    |   24 +
 .../primary/figure_data/jumpstate/makeit.bash      |    2 +
 .../primary/figure_data/workflow/workflow.pptx     |  Bin 0 -> 38913 bytes
 docs/methods/primary/figures/jumpstate.eps         |  563 +++++
 docs/methods/primary/figures/workflow.eps          | 2251 ++++++++++++++++++++
 docs/methods/primary/makepdf.bash                  |   40 +
 docs/methods/primary/methods.bib                   |   53 +
 docs/methods/primary/methods.tex                   |  519 +++++
 docs/methods/primary/packages/algorithm.sty        |  100 +
 docs/methods/primary/packages/algorithmicx.sty     |  786 +++++++
 docs/methods/primary/packages/algpseudocode.sty    |   92 +
 docs/userGuide/README.md                           |  679 ++++++
 docs/userGuide/installation.md                     |  166 ++
 redist/CMakeLists.txt                              |  216 ++
 redist/README.txt                                  |   11 +
 .../GetGitRevisionDescription.cmake                |  128 ++
 .../GetGitRevisionDescription.cmake.in             |   38 +
 redist/cmake-modules-c99fd3/LICENSE_1_0.txt        |   23 +
 redist/cmake-modules-c99fd3/README.markdown        |  107 +
 src/CMakeLists.txt                                 |   41 +
 src/c++/CMakeLists.txt                             |  106 +
 src/c++/Doxyfile.in                                | 1522 +++++++++++++
 src/c++/README.cxx.coordinates.txt                 |    8 +
 src/c++/bin/CMakeLists.txt                         |   44 +
 src/c++/bin/CheckSVLoci.cpp                        |   28 +
 src/c++/bin/DumpSVLoci.cpp                         |   28 +
 src/c++/bin/EstimateSVLoci.cpp                     |   28 +
 src/c++/bin/GenerateSVCandidates.cpp               |   28 +
 src/c++/bin/GetAlignmentStats.cpp                  |   28 +
 src/c++/bin/GetChromDepth.cpp                      |   28 +
 src/c++/bin/MergeAlignmentStats.cpp                |   28 +
 src/c++/bin/MergeEdgeStats.cpp                     |   28 +
 src/c++/bin/MergeSVLoci.cpp                        |   29 +
 src/c++/bin/SummarizeAlignmentStats.cpp            |   28 +
 src/c++/bin/SummarizeSVLoci.cpp                    |   28 +
 src/c++/bin/TestAssembler.cpp                      |   28 +
 src/c++/lib/CMakeLists.txt                         |   53 +
 src/c++/lib/README.txt                             |   35 +
 src/c++/lib/alignment/AlignerBase.hh               |  119 ++
 src/c++/lib/alignment/AlignerBaseImpl.hh           |  215 ++
 src/c++/lib/alignment/AlignerUtil.hh               |   87 +
 src/c++/lib/alignment/Alignment.cpp                |   33 +
 src/c++/lib/alignment/Alignment.hh                 |  112 +
 src/c++/lib/alignment/AlignmentScores.hh           |   48 +
 src/c++/lib/alignment/AlignmentUtil.cpp            |  199 ++
 src/c++/lib/alignment/AlignmentUtil.hh             |  138 ++
 src/c++/lib/alignment/CMakeLists.txt               |   20 +
 src/c++/lib/alignment/GlobalAligner.hh             |  124 ++
 src/c++/lib/alignment/GlobalAlignerImpl.hh         |  210 ++
 src/c++/lib/alignment/GlobalJumpAligner.hh         |  140 ++
 src/c++/lib/alignment/GlobalJumpAlignerImpl.hh     |  340 +++
 src/c++/lib/alignment/GlobalJumpIntronAligner.hh   |  150 ++
 .../lib/alignment/GlobalJumpIntronAlignerImpl.hh   |  450 ++++
 src/c++/lib/alignment/GlobalLargeIndelAligner.hh   |  183 ++
 .../lib/alignment/GlobalLargeIndelAlignerImpl.hh   |  267 +++
 src/c++/lib/alignment/JumpAlignerBase.hh           |  167 ++
 src/c++/lib/alignment/JumpAlignerBaseImpl.hh       |  255 +++
 src/c++/lib/alignment/ReadScorer.cpp               |   89 +
 src/c++/lib/alignment/ReadScorer.hh                |   76 +
 src/c++/lib/alignment/SingleRefAlignerShared.hh    |   96 +
 .../lib/alignment/SingleRefAlignerSharedImpl.hh    |  171 ++
 src/c++/lib/alignment/test/AlignmentUtilTest.cpp   |   60 +
 src/c++/lib/alignment/test/CMakeLists.txt          |   29 +
 src/c++/lib/alignment/test/GlobalAlignerTest.cpp   |  227 ++
 .../lib/alignment/test/GlobalJumpAlignerTest.cpp   |  351 +++
 .../alignment/test/GlobalJumpIntronAlignerTest.cpp |  319 +++
 .../alignment/test/GlobalLargeIndelAlignerTest.cpp |  239 +++
 src/c++/lib/alignment/test/test_main.cpp           |   23 +
 src/c++/lib/applications/CMakeLists.txt            |   24 +
 .../lib/applications/CheckSVLoci/CMakeLists.txt    |   20 +
 .../lib/applications/CheckSVLoci/CSLOptions.cpp    |   95 +
 src/c++/lib/applications/CheckSVLoci/CSLOptions.hh |   46 +
 .../lib/applications/CheckSVLoci/CheckSVLoci.cpp   |   53 +
 .../lib/applications/CheckSVLoci/CheckSVLoci.hh    |   42 +
 src/c++/lib/applications/DumpSVLoci/CMakeLists.txt |   20 +
 src/c++/lib/applications/DumpSVLoci/DSLOptions.cpp |  111 +
 src/c++/lib/applications/DumpSVLoci/DSLOptions.hh  |   50 +
 src/c++/lib/applications/DumpSVLoci/DumpSVLoci.cpp |   94 +
 src/c++/lib/applications/DumpSVLoci/DumpSVLoci.hh  |   42 +
 .../lib/applications/EstimateSVLoci/CMakeLists.txt |   20 +
 .../lib/applications/EstimateSVLoci/ESLOptions.cpp |  170 ++
 .../lib/applications/EstimateSVLoci/ESLOptions.hh  |   61 +
 .../applications/EstimateSVLoci/EstimateSVLoci.cpp |  198 ++
 .../applications/EstimateSVLoci/EstimateSVLoci.hh  |   42 +
 .../EstimateSVLoci/SVLocusSetFinder.cpp            |  294 +++
 .../EstimateSVLoci/SVLocusSetFinder.hh             |  130 ++
 .../GenerateSVCandidates/CMakeLists.txt            |   20 +
 .../GenerateSVCandidates/EdgeOptions.hh            |   49 +
 .../GenerateSVCandidates/EdgeOptionsParser.cpp     |  114 +
 .../GenerateSVCandidates/EdgeOptionsParser.hh      |   47 +
 .../GenerateSVCandidates/EdgeRetriever.hh          |   60 +
 .../GenerateSVCandidates/EdgeRetrieverBin.cpp      |  246 +++
 .../GenerateSVCandidates/EdgeRetrieverBin.hh       |   66 +
 .../GenerateSVCandidates/EdgeRetrieverJumpBin.cpp  |  163 ++
 .../GenerateSVCandidates/EdgeRetrieverJumpBin.hh   |   70 +
 .../GenerateSVCandidates/EdgeRetrieverLocus.cpp    |  150 ++
 .../GenerateSVCandidates/EdgeRetrieverLocus.hh     |   54 +
 .../GenerateSVCandidates/EdgeRuntimeTracker.cpp    |   93 +
 .../GenerateSVCandidates/EdgeRuntimeTracker.hh     |   96 +
 .../GenerateSVCandidates/FatSVCandidate.cpp        |   45 +
 .../GenerateSVCandidates/FatSVCandidate.hh         |  116 +
 .../GenerateSVCandidates/GSCEdgeStatsManager.cpp   |   64 +
 .../GenerateSVCandidates/GSCEdgeStatsManager.hh    |  138 ++
 .../GenerateSVCandidates/GSCOptions.cpp            |  222 ++
 .../GenerateSVCandidates/GSCOptions.hh             |   88 +
 .../GenerateSVCandidates/GenerateSVCandidates.cpp  |  297 +++
 .../GenerateSVCandidates/GenerateSVCandidates.hh   |   42 +
 .../GenerateSVCandidates/JunctionCallInfo.hh       |   91 +
 .../SVCandidateAssemblyRefiner.cpp                 | 2194 +++++++++++++++++++
 .../SVCandidateAssemblyRefiner.hh                  |  102 +
 .../GenerateSVCandidates/SVCandidateProcessor.cpp  |  574 +++++
 .../GenerateSVCandidates/SVCandidateProcessor.hh   |  124 ++
 .../GenerateSVCandidates/SVEvidence.cpp            |  102 +
 .../GenerateSVCandidates/SVEvidence.hh             |  328 +++
 .../applications/GenerateSVCandidates/SVFinder.cpp | 1360 ++++++++++++
 .../applications/GenerateSVCandidates/SVFinder.hh  |  157 ++
 .../SVScorePairAltProcessor.cpp                    |  596 ++++++
 .../SVScorePairAltProcessor.hh                     |  137 ++
 .../GenerateSVCandidates/SVScorePairProcessor.cpp  |   91 +
 .../GenerateSVCandidates/SVScorePairProcessor.hh   |  180 ++
 .../SVScorePairRefProcessor.cpp                    |  107 +
 .../SVScorePairRefProcessor.hh                     |   47 +
 .../applications/GenerateSVCandidates/SVScorer.cpp | 1952 +++++++++++++++++
 .../applications/GenerateSVCandidates/SVScorer.hh  |  292 +++
 .../GenerateSVCandidates/SVScorerPair.cpp          |  668 ++++++
 .../GenerateSVCandidates/SVScorerPairOptions.hh    |   46 +
 .../GenerateSVCandidates/SVScorerShared.cpp        |   44 +
 .../GenerateSVCandidates/SVScorerShared.hh         |   52 +
 .../GenerateSVCandidates/SVScorerSplit.cpp         |  439 ++++
 .../GenerateSVCandidates/SVSupports.cpp            |  201 ++
 .../GenerateSVCandidates/SVSupports.hh             |  219 ++
 .../GenerateSVCandidates/SplitReadAlignment.cpp    |  399 ++++
 .../GenerateSVCandidates/SplitReadAlignment.hh     |   88 +
 .../GenerateSVCandidates/test/CMakeLists.txt       |   29 +
 .../test/EdgeRetrieverBinTest.cpp                  |  285 +++
 .../test/EdgeRetrieverJumpBinTest.cpp              |  289 +++
 .../test/EdgeRetrieverLocusTest.cpp                |   69 +
 .../test/SVCandidateAssemblyRefinerTest.cpp        |   75 +
 .../GenerateSVCandidates/test/test_main.cpp        |   23 +
 .../GetAlignmentStats/AlignmentStatsOptions.cpp    |   98 +
 .../GetAlignmentStats/AlignmentStatsOptions.hh     |   38 +
 .../applications/GetAlignmentStats/CMakeLists.txt  |   20 +
 .../GetAlignmentStats/GetAlignmentStats.cpp        |   66 +
 .../GetAlignmentStats/GetAlignmentStats.hh         |   41 +
 .../lib/applications/GetChromDepth/CMakeLists.txt  |   20 +
 .../GetChromDepth/ChromDepthOptions.cpp            |  126 ++
 .../GetChromDepth/ChromDepthOptions.hh             |   41 +
 .../applications/GetChromDepth/GetChromDepth.cpp   |   69 +
 .../applications/GetChromDepth/GetChromDepth.hh    |   41 +
 .../MergeAlignmentStats/CMakeLists.txt             |   20 +
 .../MergeAlignmentStats/MergeAlignmentStats.cpp    |   64 +
 .../MergeAlignmentStats/MergeAlignmentStats.hh     |   41 +
 .../MergeAlignmentStatsOptions.cpp                 |  136 ++
 .../MergeAlignmentStatsOptions.hh                  |   40 +
 .../lib/applications/MergeEdgeStats/CMakeLists.txt |   20 +
 .../lib/applications/MergeEdgeStats/MESOptions.cpp |  153 ++
 .../lib/applications/MergeEdgeStats/MESOptions.hh  |   46 +
 .../applications/MergeEdgeStats/MergeEdgeStats.cpp |   82 +
 .../applications/MergeEdgeStats/MergeEdgeStats.hh  |   42 +
 .../lib/applications/MergeSVLoci/CMakeLists.txt    |   20 +
 .../lib/applications/MergeSVLoci/MSLOptions.cpp    |  151 ++
 src/c++/lib/applications/MergeSVLoci/MSLOptions.hh |   49 +
 .../lib/applications/MergeSVLoci/MergeSVLoci.cpp   |   91 +
 .../lib/applications/MergeSVLoci/MergeSVLoci.hh    |   43 +
 .../SummarizeAlignmentStats/CMakeLists.txt         |   20 +
 .../SummarizeAlignmentStats/SASOptions.cpp         |  112 +
 .../SummarizeAlignmentStats/SASOptions.hh          |   43 +
 .../SummarizeAlignmentStats.cpp                    |   81 +
 .../SummarizeAlignmentStats.hh                     |   42 +
 .../applications/SummarizeSVLoci/CMakeLists.txt    |   20 +
 .../applications/SummarizeSVLoci/SSLOptions.cpp    |  102 +
 .../lib/applications/SummarizeSVLoci/SSLOptions.hh |   43 +
 .../SummarizeSVLoci/SummarizeSVLoci.cpp            |   66 +
 .../SummarizeSVLoci/SummarizeSVLoci.hh             |   42 +
 .../lib/applications/TestAssembler/CMakeLists.txt  |   20 +
 .../applications/TestAssembler/TestAssembler.cpp   |   77 +
 .../applications/TestAssembler/TestAssembler.hh    |   51 +
 .../TestAssembler/TestAssemblerOptions.cpp         |   98 +
 .../TestAssembler/TestAssemblerOptions.hh          |   39 +
 .../TestAssembler/extractAssemblyReads.cpp         |   86 +
 .../TestAssembler/extractAssemblyReads.hh          |   45 +
 src/c++/lib/appstats/CMakeLists.txt                |   20 +
 src/c++/lib/appstats/GSCEdgeStats.cpp              |  191 ++
 src/c++/lib/appstats/GSCEdgeStats.hh               |  223 ++
 src/c++/lib/appstats/SVFinderStats.cpp             |   39 +
 src/c++/lib/appstats/SVFinderStats.hh              |   72 +
 src/c++/lib/assembly/AssembledContig.cpp           |   42 +
 src/c++/lib/assembly/AssembledContig.hh            |   60 +
 src/c++/lib/assembly/AssemblyReadInfo.hh           |   44 +
 src/c++/lib/assembly/CMakeLists.txt                |   20 +
 src/c++/lib/assembly/IterativeAssembler.cpp        |  927 ++++++++
 src/c++/lib/assembly/IterativeAssembler.hh         |   48 +
 src/c++/lib/assembly/SmallAssembler.cpp            |  734 +++++++
 src/c++/lib/assembly/SmallAssembler.hh             |   47 +
 src/c++/lib/assembly/test/CMakeLists.txt           |   29 +
 .../lib/assembly/test/IterativeAssemblerTest.cpp   |  133 ++
 src/c++/lib/assembly/test/SmallAssemblerTest.cpp   |  158 ++
 src/c++/lib/assembly/test/test_main.cpp            |   23 +
 src/c++/lib/blt_util/CMakeLists.txt                |   28 +
 src/c++/lib/blt_util/CircularCounter.hh            |  129 ++
 src/c++/lib/blt_util/LinearScaler.hh               |   71 +
 src/c++/lib/blt_util/MedianDepthTracker.hh         |   93 +
 src/c++/lib/blt_util/PolymorphicObject.hh          |   48 +
 src/c++/lib/blt_util/RangeMap.hh                   |  343 +++
 src/c++/lib/blt_util/ReadKey.cpp                   |   39 +
 src/c++/lib/blt_util/ReadKey.hh                    |  120 ++
 src/c++/lib/blt_util/RegionSum.hh                  |   72 +
 src/c++/lib/blt_util/RegionTracker.cpp             |  118 +
 src/c++/lib/blt_util/RegionTracker.hh              |  222 ++
 src/c++/lib/blt_util/RegionTrackerImpl.hh          |  173 ++
 src/c++/lib/blt_util/SampleVector.hh               |   65 +
 src/c++/lib/blt_util/SampleVectorImpl.hh           |   51 +
 src/c++/lib/blt_util/SimpleAlignment.cpp           |  139 ++
 src/c++/lib/blt_util/SimpleAlignment.hh            |   75 +
 src/c++/lib/blt_util/SizeDistribution.cpp          |  231 ++
 src/c++/lib/blt_util/SizeDistribution.hh           |  167 ++
 src/c++/lib/blt_util/align_path.cpp                | 1025 +++++++++
 src/c++/lib/blt_util/align_path.hh                 |  560 +++++
 src/c++/lib/blt_util/align_path_impl.hh            |   89 +
 .../lib/blt_util/align_path_match_descriptor.cpp   |  420 ++++
 .../lib/blt_util/align_path_match_descriptor.hh    |   53 +
 src/c++/lib/blt_util/align_path_util.hh            |  108 +
 src/c++/lib/blt_util/basic_matrix.hh               |  105 +
 src/c++/lib/blt_util/binomial_test.cpp             |  194 ++
 src/c++/lib/blt_util/binomial_test.hh              |  105 +
 src/c++/lib/blt_util/blt_exception.cpp             |   46 +
 src/c++/lib/blt_util/blt_exception.hh              |   44 +
 src/c++/lib/blt_util/blt_types.hh                  |   32 +
 src/c++/lib/blt_util/chrom_depth_map.cpp           |  102 +
 src/c++/lib/blt_util/chrom_depth_map.hh            |   37 +
 src/c++/lib/blt_util/compat_unistd.h               |   52 +
 src/c++/lib/blt_util/compat_util.cpp               |   84 +
 src/c++/lib/blt_util/compat_util.hh                |   54 +
 src/c++/lib/blt_util/compat_util_win32_realpath.c  |   95 +
 src/c++/lib/blt_util/depth_buffer.hh               |  168 ++
 src/c++/lib/blt_util/depth_buffer_util.cpp         |   50 +
 src/c++/lib/blt_util/depth_buffer_util.hh          |   37 +
 src/c++/lib/blt_util/flyweight_observer.hh         |   67 +
 src/c++/lib/blt_util/id_map.hh                     |  228 ++
 src/c++/lib/blt_util/input_stream_handler.cpp      |  196 ++
 src/c++/lib/blt_util/input_stream_handler.hh       |  169 ++
 src/c++/lib/blt_util/io_util.cpp                   |   67 +
 src/c++/lib/blt_util/io_util.hh                    |   52 +
 src/c++/lib/blt_util/istream_line_splitter.cpp     |  161 ++
 src/c++/lib/blt_util/istream_line_splitter.hh      |  130 ++
 src/c++/lib/blt_util/known_pos_range2.cpp          |   41 +
 src/c++/lib/blt_util/known_pos_range2.hh           |  224 ++
 src/c++/lib/blt_util/log.cpp                       |   30 +
 src/c++/lib/blt_util/log.hh                        |   29 +
 src/c++/lib/blt_util/math_util.hh                  |  127 ++
 src/c++/lib/blt_util/observer.hh                   |  168 ++
 src/c++/lib/blt_util/parse_util.cpp                |  267 +++
 src/c++/lib/blt_util/parse_util.hh                 |  152 ++
 src/c++/lib/blt_util/pos_processor_base.hh         |   58 +
 src/c++/lib/blt_util/pos_range.cpp                 |   55 +
 src/c++/lib/blt_util/pos_range.hh                  |  209 ++
 src/c++/lib/blt_util/prob_util.cpp                 |   55 +
 src/c++/lib/blt_util/prob_util.hh                  |  346 +++
 src/c++/lib/blt_util/qscore.hh                     |  124 ++
 src/c++/lib/blt_util/qscore_cache.cpp              |   76 +
 src/c++/lib/blt_util/qscore_cache.hh               |  140 ++
 src/c++/lib/blt_util/qscore_snp.cpp                |   47 +
 src/c++/lib/blt_util/qscore_snp.hh                 |   65 +
 src/c++/lib/blt_util/reference_contig_segment.hh   |  114 +
 src/c++/lib/blt_util/seq_printer.cpp               |   55 +
 src/c++/lib/blt_util/seq_printer.hh                |   45 +
 src/c++/lib/blt_util/seq_util.cpp                  |  175 ++
 src/c++/lib/blt_util/seq_util.hh                   |  324 +++
 src/c++/lib/blt_util/set_util.hh                   |   60 +
 src/c++/lib/blt_util/sig_handler.cpp               |   73 +
 src/c++/lib/blt_util/sig_handler.hh                |   31 +
 src/c++/lib/blt_util/stage_manager.cpp             |  371 ++++
 src/c++/lib/blt_util/stage_manager.hh              |  252 +++
 src/c++/lib/blt_util/stat_util.cpp                 |   65 +
 src/c++/lib/blt_util/stat_util.hh                  |   38 +
 src/c++/lib/blt_util/stream_stat.cpp               |   38 +
 src/c++/lib/blt_util/stream_stat.hh                |  119 ++
 src/c++/lib/blt_util/string_util.cpp               |  114 +
 src/c++/lib/blt_util/string_util.hh                |   57 +
 src/c++/lib/blt_util/test/CMakeLists.txt           |   28 +
 src/c++/lib/blt_util/test/CircularCounter_test.cpp |   46 +
 .../lib/blt_util/test/MedianDepthTracker_test.cpp  |  115 +
 src/c++/lib/blt_util/test/RangeMap_test.cpp        |  147 ++
 src/c++/lib/blt_util/test/RegionSum_test.cpp       |   38 +
 src/c++/lib/blt_util/test/RegionTracker_test.cpp   |  306 +++
 src/c++/lib/blt_util/test/SampleVector_test.cpp    |   47 +
 src/c++/lib/blt_util/test/SimpleAlignment_test.cpp |   62 +
 .../lib/blt_util/test/SizeDistribution_test.cpp    |  117 +
 src/c++/lib/blt_util/test/align_path_test.cpp      |  222 ++
 src/c++/lib/blt_util/test/binomial_test_test.cpp   |  449 ++++
 src/c++/lib/blt_util/test/boost_icl_test.cpp       |  152 ++
 src/c++/lib/blt_util/test/compat_util_test.cpp     |   83 +
 src/c++/lib/blt_util/test/depth_buffer_test.cpp    |  118 +
 src/c++/lib/blt_util/test/id_map_test.cpp          |   70 +
 src/c++/lib/blt_util/test/io_util_test.cpp         |   61 +
 .../blt_util/test/istream_line_splitter_test.cpp   |   94 +
 .../lib/blt_util/test/known_pos_range2_test.cpp    |   92 +
 src/c++/lib/blt_util/test/math_util_test.cpp       |   83 +
 src/c++/lib/blt_util/test/observer_test.cpp        |  139 ++
 src/c++/lib/blt_util/test/parse_util_test.cpp      |  251 +++
 src/c++/lib/blt_util/test/pos_range_test.cpp       |   96 +
 src/c++/lib/blt_util/test/prob_util_test.cpp       |  151 ++
 src/c++/lib/blt_util/test/seq_util_test.cpp        |   62 +
 src/c++/lib/blt_util/test/set_util_test.cpp        |   54 +
 src/c++/lib/blt_util/test/stage_manager_test.cpp   |  177 ++
 src/c++/lib/blt_util/test/stream_stat_test.cpp     |   48 +
 src/c++/lib/blt_util/test/string_util_test.cpp     |   90 +
 src/c++/lib/blt_util/test/test_main.cpp            |   23 +
 src/c++/lib/blt_util/test/window_util_test.cpp     |   73 +
 src/c++/lib/blt_util/thirdparty_pop.h              |   28 +
 src/c++/lib/blt_util/thirdparty_push.h             |   30 +
 src/c++/lib/blt_util/time_util.cpp                 |   52 +
 src/c++/lib/blt_util/time_util.hh                  |  202 ++
 src/c++/lib/blt_util/window_util.hh                |  128 ++
 src/c++/lib/common/CMakeLists.txt                  |   20 +
 src/c++/lib/common/Exceptions.cpp                  |  116 +
 src/c++/lib/common/Exceptions.hh                   |  230 ++
 src/c++/lib/common/OutStream.cpp                   |   78 +
 src/c++/lib/common/OutStream.hh                    |   63 +
 src/c++/lib/common/Program.cpp                     |  150 ++
 src/c++/lib/common/Program.hh                      |   68 +
 src/c++/lib/common/ProgramConfig.hh                |   58 +
 src/c++/lib/common/ProgramUtil.cpp                 |   51 +
 src/c++/lib/common/ProgramUtil.hh                  |   40 +
 src/c++/lib/common/ReadPairOrient.cpp              |   39 +
 src/c++/lib/common/ReadPairOrient.hh               |  147 ++
 src/c++/lib/common/config.h.in                     |  127 ++
 src/c++/lib/common/configBuildTimeInfo.h.in        |   29 +
 src/c++/lib/common/test/CMakeLists.txt             |   28 +
 src/c++/lib/common/test/ReadPairOrientTest.cpp     |   82 +
 src/c++/lib/common/test/test_main.cpp              |   23 +
 src/c++/lib/format/CMakeLists.txt                  |   20 +
 src/c++/lib/format/VcfWriterCandidateSV.cpp        |   84 +
 src/c++/lib/format/VcfWriterCandidateSV.hh         |   62 +
 src/c++/lib/format/VcfWriterDiploidSV.cpp          |  275 +++
 src/c++/lib/format/VcfWriterDiploidSV.hh           |  113 +
 src/c++/lib/format/VcfWriterSV.cpp                 |  922 ++++++++
 src/c++/lib/format/VcfWriterSV.hh                  |  187 ++
 src/c++/lib/format/VcfWriterScoredSV.hh            |   58 +
 src/c++/lib/format/VcfWriterSomaticSV.cpp          |  163 ++
 src/c++/lib/format/VcfWriterSomaticSV.hh           |  111 +
 src/c++/lib/format/VcfWriterTumorSV.cpp            |  136 ++
 src/c++/lib/format/VcfWriterTumorSV.hh             |   96 +
 src/c++/lib/htsapi/CMakeLists.txt                  |   28 +
 src/c++/lib/htsapi/SimpleAlignment_bam_util.cpp    |   65 +
 src/c++/lib/htsapi/SimpleAlignment_bam_util.hh     |   45 +
 src/c++/lib/htsapi/align_path_bam_util.cpp         |   85 +
 src/c++/lib/htsapi/align_path_bam_util.hh          |   51 +
 src/c++/lib/htsapi/bam_dumper.cpp                  |   91 +
 src/c++/lib/htsapi/bam_dumper.hh                   |   52 +
 src/c++/lib/htsapi/bam_header_info.cpp             |   53 +
 src/c++/lib/htsapi/bam_header_info.hh              |  108 +
 src/c++/lib/htsapi/bam_header_util.cpp             |  217 ++
 src/c++/lib/htsapi/bam_header_util.hh              |   81 +
 src/c++/lib/htsapi/bam_record.cpp                  |  130 ++
 src/c++/lib/htsapi/bam_record.hh                   |  372 ++++
 src/c++/lib/htsapi/bam_record_util.cpp             |  141 ++
 src/c++/lib/htsapi/bam_record_util.hh              |   98 +
 src/c++/lib/htsapi/bam_seq.cpp                     |   42 +
 src/c++/lib/htsapi/bam_seq.hh                      |  316 +++
 src/c++/lib/htsapi/bam_seq_read_util.cpp           |   69 +
 src/c++/lib/htsapi/bam_seq_read_util.hh            |   35 +
 src/c++/lib/htsapi/bam_streamer.cpp                |  297 +++
 src/c++/lib/htsapi/bam_streamer.hh                 |  118 +
 src/c++/lib/htsapi/bam_util.cpp                    |  177 ++
 src/c++/lib/htsapi/bam_util.hh                     |  156 ++
 src/c++/lib/htsapi/bed_record.cpp                  |   94 +
 src/c++/lib/htsapi/bed_record.hh                   |   66 +
 src/c++/lib/htsapi/bed_streamer.cpp                |   87 +
 src/c++/lib/htsapi/bed_streamer.hh                 |   54 +
 src/c++/lib/htsapi/hts_streamer.cpp                |  120 ++
 src/c++/lib/htsapi/hts_streamer.hh                 |   71 +
 src/c++/lib/htsapi/sam_util.hh                     |   34 +
 src/c++/lib/htsapi/samtools_fasta_util.cpp         |  171 ++
 src/c++/lib/htsapi/samtools_fasta_util.hh          |   74 +
 src/c++/lib/htsapi/tabix_util.hh                   |   36 +
 src/c++/lib/htsapi/test/CMakeLists.txt             |   28 +
 .../lib/htsapi/test/align_path_bam_util_test.cpp   |   49 +
 src/c++/lib/htsapi/test/bam_header_util_test.cpp   |   48 +
 src/c++/lib/htsapi/test/bed_streamer_test.bed.gz   |  Bin 0 -> 85 bytes
 .../lib/htsapi/test/bed_streamer_test.bed.gz.tbi   |  Bin 0 -> 120 bytes
 src/c++/lib/htsapi/test/bed_streamer_test.cpp      |   70 +
 src/c++/lib/htsapi/test/test_config.h.in           |   23 +
 src/c++/lib/htsapi/test/test_main.cpp              |   23 +
 src/c++/lib/htsapi/test/vcf_streamer_test.cpp      |  106 +
 src/c++/lib/htsapi/test/vcf_streamer_test.vcf.gz   |  Bin 0 -> 1126 bytes
 .../lib/htsapi/test/vcf_streamer_test.vcf.gz.tbi   |  Bin 0 -> 139 bytes
 src/c++/lib/htsapi/vcf_record.cpp                  |  201 ++
 src/c++/lib/htsapi/vcf_record.hh                   |  104 +
 src/c++/lib/htsapi/vcf_streamer.cpp                |  172 ++
 src/c++/lib/htsapi/vcf_streamer.hh                 |   63 +
 src/c++/lib/htsapi/vcf_util.cpp                    |  165 ++
 src/c++/lib/htsapi/vcf_util.hh                     |  120 ++
 src/c++/lib/manta/BamRegionProcessor.hh            |   58 +
 src/c++/lib/manta/CMakeLists.txt                   |   20 +
 src/c++/lib/manta/ChromDepthFilterUtil.cpp         |   64 +
 src/c++/lib/manta/ChromDepthFilterUtil.hh          |   63 +
 src/c++/lib/manta/EventInfo.hh                     |   44 +
 src/c++/lib/manta/JunctionIdGenerator.cpp          |   53 +
 src/c++/lib/manta/JunctionIdGenerator.hh           |   70 +
 src/c++/lib/manta/MultiJunctionUtil.cpp            |  415 ++++
 src/c++/lib/manta/MultiJunctionUtil.hh             |   39 +
 src/c++/lib/manta/ReadChromDepthUtil.cpp           |  503 +++++
 src/c++/lib/manta/ReadChromDepthUtil.hh            |   36 +
 src/c++/lib/manta/ReadGroupLabel.cpp               |   38 +
 src/c++/lib/manta/ReadGroupLabel.hh                |  115 +
 src/c++/lib/manta/ReadGroupStats.hh                |   53 +
 src/c++/lib/manta/ReadGroupStatsSet.cpp            |  147 ++
 src/c++/lib/manta/ReadGroupStatsSet.hh             |  115 +
 src/c++/lib/manta/ReadGroupStatsUtil.cpp           |  813 +++++++
 src/c++/lib/manta/ReadGroupStatsUtil.hh            |   35 +
 src/c++/lib/manta/RemoteMateReadUtil.cpp           |   64 +
 src/c++/lib/manta/RemoteMateReadUtil.hh            |   85 +
 src/c++/lib/manta/SVBreakend.cpp                   |   57 +
 src/c++/lib/manta/SVBreakend.hh                    |  404 ++++
 src/c++/lib/manta/SVCandidate.cpp                  |   65 +
 src/c++/lib/manta/SVCandidate.hh                   |  270 +++
 src/c++/lib/manta/SVCandidateAssembler.cpp         |  810 +++++++
 src/c++/lib/manta/SVCandidateAssembler.hh          |  129 ++
 src/c++/lib/manta/SVCandidateAssemblyData.cpp      |   38 +
 src/c++/lib/manta/SVCandidateAssemblyData.hh       |  181 ++
 src/c++/lib/manta/SVCandidateSetData.cpp           |  178 ++
 src/c++/lib/manta/SVCandidateSetData.hh            |  315 +++
 src/c++/lib/manta/SVCandidateUtil.cpp              |  140 ++
 src/c++/lib/manta/SVCandidateUtil.hh               |  196 ++
 src/c++/lib/manta/SVLocusEvidenceCount.hh          |   84 +
 src/c++/lib/manta/SVLocusScanner.cpp               | 1445 +++++++++++++
 src/c++/lib/manta/SVLocusScanner.hh                |  365 ++++
 src/c++/lib/manta/SVLocusScannerSemiAligned.cpp    |  340 +++
 src/c++/lib/manta/SVLocusScannerSemiAligned.hh     |   75 +
 src/c++/lib/manta/SVModelScoreInfo.hh              |   60 +
 src/c++/lib/manta/SVMultiJunctionCandidate.cpp     |   46 +
 src/c++/lib/manta/SVMultiJunctionCandidate.hh      |   51 +
 src/c++/lib/manta/SVMultiJunctionCandidateUtil.hh  |   40 +
 src/c++/lib/manta/SVReferenceUtil.cpp              |  238 +++
 src/c++/lib/manta/SVReferenceUtil.hh               |  108 +
 src/c++/lib/manta/SVScoreInfo.cpp                  |  268 +++
 src/c++/lib/manta/SVScoreInfo.hh                   |  197 ++
 src/c++/lib/manta/SVScoreInfoDiploid.cpp           |   75 +
 src/c++/lib/manta/SVScoreInfoDiploid.hh            |  185 ++
 src/c++/lib/manta/SVScoreInfoSomatic.cpp           |   43 +
 src/c++/lib/manta/SVScoreInfoSomatic.hh            |  181 ++
 src/c++/lib/manta/SVScoreInfoTumor.hh              |  142 ++
 src/c++/lib/manta/ShadowReadFinder.cpp             |  120 ++
 src/c++/lib/manta/ShadowReadFinder.hh              |  128 ++
 src/c++/lib/manta/test/CMakeLists.txt              |   29 +
 .../manta/test/SVLocusScannerSemiAlignedTest.cpp   |  176 ++
 src/c++/lib/manta/test/SVLocusScannerTest.cpp      |   63 +
 src/c++/lib/manta/test/test_main.cpp               |   23 +
 src/c++/lib/options/AlignmentFileOptions.hh        |   33 +
 src/c++/lib/options/AlignmentFileOptionsParser.cpp |  102 +
 src/c++/lib/options/AlignmentFileOptionsParser.hh  |   43 +
 src/c++/lib/options/CMakeLists.txt                 |   20 +
 src/c++/lib/options/CallOptionsDiploid.cpp         |   46 +
 src/c++/lib/options/CallOptionsDiploid.hh          |   61 +
 src/c++/lib/options/CallOptionsShared.hh           |   38 +
 src/c++/lib/options/CallOptionsSomatic.cpp         |   47 +
 src/c++/lib/options/CallOptionsSomatic.hh          |   59 +
 src/c++/lib/options/CallOptionsTumor.cpp           |   44 +
 src/c++/lib/options/CallOptionsTumor.hh            |   59 +
 src/c++/lib/options/IterativeAssemblerOptions.hh   |   65 +
 src/c++/lib/options/ReadScannerOptions.hh          |  111 +
 src/c++/lib/options/ReadScannerOptionsParser.cpp   |   64 +
 src/c++/lib/options/ReadScannerOptionsParser.hh    |   40 +
 src/c++/lib/options/SVLocusSetOptions.hh           |   61 +
 src/c++/lib/options/SVLocusSetOptionsParser.cpp    |   58 +
 src/c++/lib/options/SVLocusSetOptionsParser.hh     |   40 +
 src/c++/lib/options/SVRefinerOptions.hh            |   90 +
 src/c++/lib/options/SmallAssemblerOptions.hh       |   60 +
 src/c++/lib/options/optionsUtil.cpp                |   55 +
 src/c++/lib/options/optionsUtil.hh                 |   35 +
 src/c++/lib/svgraph/CMakeLists.txt                 |   20 +
 src/c++/lib/svgraph/EdgeInfo.cpp                   |   48 +
 src/c++/lib/svgraph/EdgeInfo.hh                    |   50 +
 src/c++/lib/svgraph/EdgeInfoUtil.cpp               |   65 +
 src/c++/lib/svgraph/EdgeInfoUtil.hh                |   50 +
 src/c++/lib/svgraph/GenomeInterval.cpp             |   36 +
 src/c++/lib/svgraph/GenomeInterval.hh              |   91 +
 src/c++/lib/svgraph/GenomeIntervalTracker.hh       |   64 +
 src/c++/lib/svgraph/GenomeIntervalUtil.cpp         |   74 +
 src/c++/lib/svgraph/GenomeIntervalUtil.hh          |   39 +
 src/c++/lib/svgraph/SVLocus.cpp                    |  639 ++++++
 src/c++/lib/svgraph/SVLocus.hh                     |  456 ++++
 src/c++/lib/svgraph/SVLocusNode.cpp                |   80 +
 src/c++/lib/svgraph/SVLocusNode.hh                 |  663 ++++++
 src/c++/lib/svgraph/SVLocusSampleCounts.cpp        |  121 ++
 src/c++/lib/svgraph/SVLocusSampleCounts.hh         |  226 ++
 src/c++/lib/svgraph/SVLocusSet.cpp                 | 1486 +++++++++++++
 src/c++/lib/svgraph/SVLocusSet.hh                  |  688 ++++++
 src/c++/lib/svgraph/test/CMakeLists.txt            |   28 +
 src/c++/lib/svgraph/test/GenomeIntervalTest.cpp    |   53 +
 .../lib/svgraph/test/GenomeIntervalUtilTest.cpp    |   58 +
 src/c++/lib/svgraph/test/SVLocusNodeTest.cpp       |   57 +
 src/c++/lib/svgraph/test/SVLocusSerializeTest.cpp  |  173 ++
 src/c++/lib/svgraph/test/SVLocusSetPrivateTest.cpp |  123 ++
 .../lib/svgraph/test/SVLocusSetSerializeTest.cpp   |  143 ++
 src/c++/lib/svgraph/test/SVLocusSetTest.cpp        | 1172 ++++++++++
 src/c++/lib/svgraph/test/SVLocusTest.cpp           |  169 ++
 src/c++/lib/svgraph/test/SVLocusTestUtil.hh        |   48 +
 src/c++/lib/svgraph/test/test_main.cpp             |   23 +
 src/cmake/boost.cmake                              |  256 +++
 src/cmake/bootstrap/common.bash                    |   47 +
 src/cmake/bootstrap/installCmake.bash              |   98 +
 src/cmake/buildTimeConfigure.cmake                 |   38 +
 src/cmake/cxxCommon.cmake                          |   31 +
 src/cmake/cxxConfigure.cmake                       |  511 +++++
 src/cmake/cxxExecutable.cmake                      |   31 +
 src/cmake/cxxLibrary.cmake                         |   76 +
 src/cmake/cxxTestLibrary.cmake                     |   72 +
 src/cmake/getBuildTimeConfigInfo.cmake             |   65 +
 src/cmake/globals.cmake                            |   47 +
 src/cmake/macros.cmake                             |  139 ++
 src/cmake/postInstall/CMakeLists.txt               |   26 +
 src/cmake/preInstall/CMakeLists.txt                |   29 +
 .../checkTargetPathsWritable/CMakeLists.txt        |   35 +
 .../checkTargetPathWritable.cmake                  |   47 +
 .../preInstall/copyrightAndChanges/CMakeLists.txt  |   29 +
 src/demo/CMakeLists.txt                            |   35 +
 src/demo/README.md                                 |   23 +
 src/demo/data/CMakeLists.txt                       |   39 +
 .../data/G15512.HCC1954.1.COST16011_region.bam     |  Bin 0 -> 845397 bytes
 .../data/G15512.HCC1954.1.COST16011_region.bam.bai |  Bin 0 -> 99136 bytes
 ...HCC1954.NORMAL.30x.compare.COST16011_region.bam |  Bin 0 -> 99059 bytes
 ...954.NORMAL.30x.compare.COST16011_region.bam.bai |  Bin 0 -> 99136 bytes
 ...Homo_sapiens_assembly19.COST16011_region.fa.fai |    2 +
 ..._sapiens_assembly19.COST16011_region.fa.tar.bz2 |  Bin 0 -> 2490 bytes
 src/demo/expectedResults/somaticSV.vcf.gz          |  Bin 0 -> 1525 bytes
 src/demo/runMantaWorkflowDemo.py                   |  176 ++
 src/python/CMakeLists.txt                          |   22 +
 src/python/bin/CMakeLists.txt                      |   35 +
 src/python/bin/configManta.py                      |  172 ++
 src/python/bin/configManta.py.ini                  |   36 +
 src/python/lib/CMakeLists.txt                      |   84 +
 src/python/lib/checkChromSet.py                    |  166 ++
 src/python/lib/configBuildTimeInfo.py              |   27 +
 src/python/lib/configureOptions.py                 |  246 +++
 src/python/lib/configureUtil.py                    |  305 +++
 src/python/lib/estimateHardware.py                 |  174 ++
 src/python/lib/makeRunScript.py                    |  290 +++
 src/python/lib/mantaOptions.py                     |  164 ++
 src/python/lib/mantaWorkflow.py                    |  757 +++++++
 src/python/lib/sharedWorkflow.py                   |  191 ++
 src/python/lib/workflowUtil.py                     |  378 ++++
 src/python/libexec/CMakeLists.txt                  |   31 +
 src/python/libexec/cat.py                          |   81 +
 src/python/libexec/denovo_scoring.py               |  190 ++
 src/python/libexec/extractSmallIndelCandidates.py  |  121 ++
 src/python/libexec/filterBam.py                    |  111 +
 src/python/libexec/mergeBam.py                     |   74 +
 src/python/libexec/mergeChromDepth.py              |   96 +
 src/python/libexec/ploidyFilter.py                 |  261 +++
 src/python/libexec/sortBam.py                      |   54 +
 src/python/libexec/sortEdgeLogs.py                 |  105 +
 src/python/libexec/sortVcf.py                      |  256 +++
 src/python/libexec/vcfCmdlineSwapper.py            |   56 +
 src/srcqc/README.txt                               |    2 +
 src/srcqc/check_for_nonascii_source.bash           |   97 +
 src/srcqc/run_cppcheck.py                          |  139 ++
 573 files changed, 83733 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..8f407a1
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,213 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## Top level cmake file
+##
+## author Come Raczy
+##
+################################################################################
+
+if (WIN32)
+    cmake_minimum_required(VERSION 3.1.0)
+else ()
+    cmake_minimum_required(VERSION 2.8.0)
+endif ()
+
+message (STATUS "==== Initializing project cmake configuration ====")
+
+
+set_property(GLOBAL PROPERTY USE_FOLDERS ON)
+enable_testing()
+
+
+# paths:
+set(THIS_REDIST_DIR "${CMAKE_SOURCE_DIR}/redist")
+set(THIS_SOURCE_DIR "${CMAKE_SOURCE_DIR}/src")
+set(THIS_MODULE_DIR "${THIS_SOURCE_DIR}/cmake")
+set(THIS_SOURCE_QC_DIR "${THIS_SOURCE_DIR}/srcqc")
+
+set (THIS_GLOBALS_CMAKE "${THIS_MODULE_DIR}/globals.cmake")
+set (THIS_MACROS_CMAKE "${THIS_MODULE_DIR}/macros.cmake")
+
+# add custom functions:
+include ("${THIS_MACROS_CMAKE}")
+
+#
+# if true, build with very strict error checking (disabled in src release tarballs)
+#
+set(DEVELOPER_MODE true)
+
+#
+# set WIN specific cache values
+#   - WIN user potentially interacts with cache via cmake-gui
+#   - Linux user interacts via the configure shell script
+#
+if (WIN32)
+    set(IS_MSVC_ANALYZE FALSE CACHE BOOL
+        "Turn on Visual Studio /analyze option")
+endif ()
+
+#
+# setup configuration types, and pick set a build type from this set
+#
+# To translate the CMAKE-ese here:
+# The plural of "BUILD_TYPE" is not "BUILD_TYPES", it is "CONFIGURATION_TYPES"
+#
+set(CMAKE_CONFIGURATION_TYPES_TMP Debug Release RelWithDebInfo)
+if (NOT WIN32)
+    set(CMAKE_CONFIGURATION_TYPES_TMP ${CMAKE_CONFIGURATION_TYPES_TMP} ASan)
+endif ()
+set(CMAKE_CONFIGURATION_TYPES ${CMAKE_CONFIGURATION_TYPES_TMP} CACHE STRING "" FORCE)
+
+join(CMAKE_CONFIGURATION_TYPES ", " CMAKE_CONFIGURATION_TYPES_STRING)
+
+if (NOT CMAKE_BUILD_TYPE)
+    set(DEFAULT_CMAKE_BUILD_TYPE Release)
+    set(CMAKE_BUILD_TYPE ${DEFAULT_CMAKE_BUILD_TYPE} CACHE STRING
+        "Choose the type of build, options are: {${CMAKE_CONFIGURATION_TYPES_STRING}} (default: ${DEFAULT_CMAKE_BUILD_TYPE})")
+endif()
+
+# check that a valid build type has been selected:
+list(FIND CMAKE_CONFIGURATION_TYPES ${CMAKE_BUILD_TYPE} TINDEX)
+if(TINDEX EQUAL -1)
+    message (FATAL_ERROR "Selected build type, '${CMAKE_BUILD_TYPE}', is not in the set of available types: {${CMAKE_CONFIGURATION_TYPES_STRING}}")
+endif()
+
+message (STATUS "BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
+
+#
+# other cache variables
+#
+
+if (NOT CMAKE_PARALLEL)
+    # set to true if win build needs to rebuild boost, right now on win32 all we do is unpack boost for headers:
+    set (IS_WIN_BOOST_BUILD FALSE)
+
+
+    if (WIN32 AND IS_WIN_BOOST_BUILD)
+        set(CMAKE_PARALLEL "0" CACHE STRING
+            "Choose the number of cores used by cmake during configuration (no default)")
+
+        # give windows/cmake-gui user the chance to set this before proceeding, otherwise
+        # we force a long wait for boost to compile
+        message (WARNING "\n*** Must set CMAKE_PARALLEL before continuing configuration ***\n")
+        return()
+    else ()
+        set (CMAKE_PARALLEL "1")
+    endif ()
+endif ()
+
+message (STATUS "CMAKE_PARALLEL: ${CMAKE_PARALLEL}")
+
+#
+# setup project/version
+#
+
+set(THIS_PROJECT_NAME "manta")
+
+project (${THIS_PROJECT_NAME})
+
+# find interpreters
+find_package(PythonInterp)
+if (NOT PYTHONINTERP_FOUND)
+    message (WARNING "No python interpreter found, disabling optional python build and installation components. Installed workflow requires python interpreter to run")
+endif()
+
+set (THIS_ARCH ${CMAKE_SYSTEM_PROCESSOR})
+if (NOT WIN32)
+    if (CMAKE_SIZEOF_VOID_P MATCHES 8)
+        set (THIS_ARCH "x86_64")
+    else ()
+        set (THIS_ARCH "x86")
+    endif ()
+endif ()
+message (STATUS "TARGET_ARCHITECTURE: " ${THIS_ARCH} )
+
+
+# Create package versioning target - version derived from git describe except for
+# release tarballs. Version file is treated as always out of date.
+set(THIS_BUILDTIME_CONFIG_FILE "${CMAKE_CURRENT_BINARY_DIR}/buildTimeConfigInfo.txt")
+set(THIS_BUILDTIME_CONFIG_TARGET "${THIS_PROJECT_NAME}_buildtime_config")
+add_custom_target(
+    ${THIS_BUILDTIME_CONFIG_TARGET}
+    ALL
+    COMMAND ${CMAKE_COMMAND}
+    -D REDIST_DIR="${THIS_REDIST_DIR}"
+    -D CONFIG_FILE="${THIS_BUILDTIME_CONFIG_FILE}"
+    -D SRC_DIR="${CMAKE_CURRENT_SOURCE_DIR}"
+    -P "${THIS_MODULE_DIR}/getBuildTimeConfigInfo.cmake"
+    COMMENT "Updating buildtime config info")
+
+# Installation directories
+if    (NOT CMAKE_INSTALL_PREFIX)
+    set(CMAKE_INSTALL_PREFIX "/usr/local")
+endif ()
+
+# check that the installation prefix is acceptable
+function(check_prefix)
+    string(SUBSTRING "${CMAKE_INSTALL_PREFIX}" 0 1 slash)
+    string(COMPARE NOTEQUAL "/" "${slash}" isslash)
+    if    (isslash)
+        install(CODE "
+            string(LENGTH \"\$ENV{DESTDIR}\" ddlen)
+            if (ddlen)
+                message (FATAL_ERROR \"Installation prefix must begin with '/' if DESTDIR is set.\")
+            endif ()
+        ")
+    endif ()
+endfunction()
+
+check_prefix()
+
+# paths for binary components -- we don't currently change this from the non-binary components
+set(THIS_EXEC_PREFIX "${CMAKE_INSTALL_PREFIX}")
+set(THIS_BINDIR "${THIS_EXEC_PREFIX}/bin")
+set(THIS_LIBDIR "${THIS_EXEC_PREFIX}/lib")
+set(THIS_LIBEXECDIR "${THIS_EXEC_PREFIX}/libexec")
+
+# paths w/o architecture dependency
+set(THIS_PYTHON_LIBDIR "${CMAKE_INSTALL_PREFIX}/lib/python")
+set(THIS_DATADIR "${CMAKE_INSTALL_PREFIX}/share")
+set(THIS_DOCDIR "${CMAKE_INSTALL_PREFIX}/doc")
+set(THIS_DEMODIR "${THIS_DATADIR}/demo")
+
+message (STATUS "install prefix: ${CMAKE_INSTALL_PREFIX}")
+
+set(THIS_OPT "${THIS_PROJECT_NAME}_opt")
+add_custom_target(${THIS_OPT} ALL)
+
+# redist includes
+include ("${THIS_MODULE_DIR}/boost.cmake")
+
+if (NOT WIN32)
+    # global source QC -- don't allow non-ascii chars in source files:
+    set(THIS_SOURCE_CHECK "${THIS_PROJECT_NAME}_source_check")
+    add_custom_target(${THIS_SOURCE_CHECK}
+        ALL
+        COMMAND bash ${THIS_SOURCE_QC_DIR}/check_for_nonascii_source.bash
+        )
+
+    # force this to run early:
+    add_dependencies(${THIS_OPT} ${THIS_SOURCE_CHECK})
+endif ()
+
+add_subdirectory (redist)
+add_subdirectory (src)
diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt
new file mode 100644
index 0000000..b289f3c
--- /dev/null
+++ b/COPYRIGHT.txt
@@ -0,0 +1,242 @@
+Manta - Stuctural Variant and Indel Caller
+Copyright (c) 2013-2016 Illumina, Inc.
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+******************************************************************
+******************************************************************
+******************************************************************
+
+This distribution includes the following code libraries/external tools.
+These are distributed according to the licensing terms governing each
+library:
+
+
+******************************************************************
+
+htslib 1.2.1-204-g8197cfd
+
+[Files in this distribution outwith the cram/ subdirectory are distributed
+according to the terms of the following MIT/Expat license.]
+
+The MIT/Expat License
+
+Copyright (C) 2012-2014 Genome Research Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+
+
+[Files within the cram/ subdirectory in this distribution are distributed
+according to the terms of the following Modified 3-Clause BSD license.]
+
+The Modified-BSD License
+
+Copyright (C) 2012-2014 Genome Research Ltd.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the names Genome Research Ltd and Wellcome Trust Sanger Institute
+   nor the names of its contributors may be used to endorse or promote
+products
+   derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR ITS CONTRIBUTORS BE
+LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+[The use of a range of years within a copyright notice in this distribution
+should be interpreted as being equivalent to a list of years including the
+first and last year specified and all consecutive years between them.
+
+For example, a copyright notice that reads "Copyright (C) 2005, 2007-2009,
+2011-2012" should be interpreted as being identical to a notice that reads
+"Copyright (C) 2005, 2007, 2008, 2009, 2011, 2012" and a copyright notice
+that reads "Copyright (C) 2005-2012" should be interpreted as being identical
+to a notice that reads "Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010,
+2011, 2012".]
+
+
+******************************************************************
+
+Boost 1.56.0
+
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+******************************************************************
+
+CMake 2.8.12
+
+CMake - Cross Platform Makefile Generator
+Copyright 2000-2011 Kitware, Inc., Insight Software Consortium
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+CMake - Cross Platform Makefile Generator
+Copyright 2000-2011 Kitware, Inc., Insight Software Consortium
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the names of Kitware, Inc., the Insight Software Consortium,
+  nor the names of their contributors may be used to endorse or promote
+  products derived from this software without specific prior written
+  permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+------------------------------------------------------------------------------
+
+The above copyright and license notice applies to distributions of
+CMake in source and binary form.  Some source files contain additional
+notices of original copyright by their contributors; see each source
+for details.  Third-party software packages supplied with CMake under
+compatible licenses provide their own copyright notices documented in
+corresponding subdirectories.
+
+
+******************************************************************
+
+pyFlow 1.1.12
+
+pyFlow - a lightweight parallel task engine
+
+Copyright (c) 2012-2015 Illumina, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the
+   distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+******************************************************************
+
+cmake-modules c99fd3
+
+Copyright Iowa State University 2009-2014,
+or Copyright Sensics, Inc. 2014-2015,
+or Copyright Ryan A. Pavlik 2009-2015
+
+Distributed under the Boost Software License, Version 1.0.
+
+(See accompanying file `LICENSE_1_0.txt` or copy at
+<http://www.boost.org/LICENSE_1_0.txt>)
+
+
+******************************************************************
+
+zlib 1.2.8
+
+ (C) 1995-2013 Jean-loup Gailly and Mark Adler
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
diff --git a/ChangeLog.txt b/ChangeLog.txt
new file mode 100644
index 0000000..5ebcb81
--- /dev/null
+++ b/ChangeLog.txt
@@ -0,0 +1,288 @@
+v1.0.2
+- SPW-316 update denovo scoring script to write valide vcf when the input vcf contains more than three samples
+- Remove old/unused reference utility functions
+v1.0.1
+- MANTA-299 documentation and method cleanup for handling split read evidence with a couple of bug fixes
+- MANTA-296 Test assembler directly from bam input
+- [#43] fix freed string pointer use in sample name
+v1.0.0
+- [#36] support setting SGE task runtime limit
+- MANTA-290 add debug option to generate candidate SV evidence BAMs
+v0.29.7
+- Format change of de novo calls, moving DQ from INFO to FORMAT
+- [#40] improve robustness to user locale settings
+- close infrequent memory leak in hts_streamer
+- A number of changes in build and docs
+v0.29.6
+- MANTA-287 add the unit test of consistency of supporting reads
+- MANTA-287 fix a bug in the small assembler by checking consistency when adding a new supporting read of contig
+v0.29.5
+- [#32] Preserve file path softlinks so that sidecar index files can be found
+- Fix field name in denovo variant search script
+v0.29.4
+- Add a python script to identify de novo calls
+- MANTA-285 add option to keep all temp files to support workflow debug
+- MANTA-276 mv configure to top-level, mv guides to docs dir, add methods docs
+- MANTA-284 improve windows shell support by shortening very long cmdlines
+v0.29.3
+- MANTA-280 filter supplementary reads without SA tags and read pairs with unmatched mate information
+- STARKA-306 fix rare chunk size boundary defect in RangeMap
+v0.29.2
+- MANTA-277 fix invalid genome region requested during insert size estimation
+- Update to pyflow 1.1.12 to improve SGE filesystem delay handling
+and fix issue between SGE and recent bash shellshock fix
+v0.29.1
+- [#22] Add new manta developer guide to source docs
+- [#21] improve fragment size estimation for very short fragments
+- RNA: Improve fusion detection sensitivity
+- MANTA-261 Transfer stable components from Manta windows port
+- MANTA-273 fix support for "csi"-style BAM indices
+- MANTA-273/[#14] allow bam index filenames in single-extension (Picard) style
+v0.29.0
+- MANTA-267/[#12] Support contig names with colons (for HLA contigs in 1kg hg38)
+- MANTA-252 Complete support for CRAM input
+- MANTA-264 Remove samtools from manta dependencies
+- MANTA-252 Change default chrom depth to median estimate from alignments
+- MANTA-263 Improve performance/stability for references with
+large numbers of small contigs
+- MANTA-261 Transfer stable components from Manta windows port
+v0.28.0
+- MANTA-259 Support joint analysis of multiple diploid samples
+- MANTA-260 Add per-sample filtration to separate QUAL and GQ filters for
+diploid case
+- MANTA-252 Add fast chrom median depth estimator (partally enables CRAM)
+- MANTA-258 Add PL values to diploid output
+v0.27.2
+- MANTA-257 Fix rare failure condition for graph merge
+- MANTA-255 include zlib in build, simplify win64 development
+- MANTA-254 Fix handling of off-edge splicing in the RNA Jump Intron Aligner
+- MANTA-253 improve alignment corner cases and debugging features
+v0.27.1
+- [#6] Fix assertion caused by filtered graph edges on bin boundaries.
+- [#5] Improve robustness to filesystem delay (update ot pyflow v1.1.7)
+v0.27.0
+- MANTA-188/[#4] fix off-by-one position issues in some precise duplication
+and inversion breakends
+- MANTA-229 Add initial support for tumor-only analysis
+v0.26.5
+- Update pyflow to v1.1.6: fixes multithread bug introduced in v1.1.5.
+Manta should be isolated from this issue in theory.
+v0.26.4
+- Update license to GPLv3
+- Update to relicensed pyflow v1.1.5
+- MANTA-244 Handle unstranded RNA data
+- MANTA-239 Use RNA bam alignments for ref read scoring
+- Fix core/memory auto-detect for OSX
+v0.26.3
+- Fix OSX build and demo run (req. update to boost 1.56)
+- Update travis CI OSX build, static analyzer
+v0.26.2
+- MANTA-242 cleanup code portability and documentation:
+-- Updated all build/installation and contributor guidelines
+-- Updated Manta user guide
+-- Added Travis CI configuration file for clang/gcc build and demo run
+-- Minor code edits to clean compile on OS X 10.9, CentOS 5,6,7, Ubuntu
+12.04 and 14.04
+-- Updated demo: new dataset covers COSMIC HCC1954 variants, added test
+to verify expected output from demo run.
+v0.26.1
+- Remove python reflection from run configuration process, fixes rare
+config issue
+- VCF output formatting: FileData corrected to FileDate
+v0.26.0
+- MANTA-224 improve short-fragment handling for RNA
+- MANTA-235 kmer reference mask to accelerate RNA contig alignments
+- MANTA-232 filter large SVs with no read pair support
+- MANTA-236 expand conditions for large insertion search to normalize
+BWA-mem/Isaac performance
+- MANTA-231 expand scoring phase split read search around breakends to
+find soft clipped ref and alt support.
+- MANTA-234 remove discovery pair counts from scored output files
+- MANTA-222 support bwa-mem '-M' split read format
+- MANTA-218 filter spanning candidates without significant signal/noise
+- MANTA-155 handle N's and lowqual bases during assembly
+- MANTA-219 Build system improvements for auditing, visual studio support
+- MANTA-217 Improve runtime for large min candidate size settings, creating
+a high speed large-event mode.
+v0.25.0
+- RNA - parameter adjustments, additional vcf output, orientation fixes
+- MANTA-187 treat split reads symmetrically to improve RNA fusion detection
+-- detect split reads directly rather than via associated soft-clip sequence
+-- exclude split reads from contributing to local assembly evidence
+v0.24.0
+- MANTA-213 FFPE runtime optimization:
+-- Recognize new BAM format for fragments shorter than read length,
+prevent these reads from triggering assembly.
+-- Improve insert size distribution estimation by including fragments shorter
+than read length.
+-- Add new runtime instrumentation report for candidate generation
+-- Add runtime summary to existing graph report
+-- Reduce SW edit matrix size with short k-mer match bounds on ref seq
+-- Improve graph noise filtration by testing specific region for evidence
+signal threshold
+-- Use adaptive noise rates in hypoth gen step: background read anomaly rate
+is used to determine if signal is significant. Currently applies to small
+assembly candidates only.
+-- Collapse redundant assembly candidates for small indels to single copy
+v0.23.1
+- MANTA-206 Disable remote read search in T/N analysis
+- MANTA-195 Improve filtration of short fragments in FFPE samples
+v0.23.0
+- MANTA-200 Add filter for overlapping diploid calls which can't be explained
+as two haplotypes.
+- MANTA-199 Fix low-frequency fragment/breakpoint mismatch (primarily
+an issue when large numbers of short ref contigs were used)
+- Add CRAM support for individual tools - still need a quick chrom depth estimator
+for full workflow CRAM support
+- c++11 update
+- MANTA-185 remove transloc calls with neg position (from circular genome)
+- [mantadev] #1 Fix SA split read breakpoint position
+- MANTA-183 handle paired/single read mixture in input alignments
+- MANTA-182 submit config on cmdline
+v0.22.0
+- MANTA-181 fix assembler path and coverage issues
+- MANTA-177 filter redundant partial insertions
+- MANTA-142 improve contig alignment for large events
+- MANTA-160 add pseudo-coloring to assembly, and improve multi-pass
+read/contig association
+- MANTA-170 improve pair allele support accuracy
+- MANTA-139 add shadow and chimera reads into pair counts
+v0.21.0
+- MANTA-167 semi-mapped som pair correction
+- MANTA-156 filter out assm poison reads
+- MANTA-161 make assembly robust to seed k-mer selection
+- MANTA-164 batch retrieve assembly mate reads
+- MANTA-158 improve small indel contig alignment specificity
+- MANTA-146 use MAPQ0 mate reads in assembly
+- MANTA-48 use shadow reads for split scoring
+- MANTA-157 improve shadow read filter
+- MANTA-153 fix diploid prior
+- MANTA-150 fix SV scoring size cutoff
+- MANTA-148 check bam records for region errors
+- RNA: rna-scoring
+v0.20.1
+- Lower default min candidate size to 8
+v0.20.0
+- MANTA-136 turn on conservative large insertion calling
+- MANTA-126 multi-junction SV scoring
+- MANTA-131 improve large somatic sv specificity with expanded
+supporting evidence search
+- RNA: track candidate orientation
+v0.19.0
+- MANTA-127 RG based insert stats (default off) 
+- MANTA-128 Improved pair orientation estimation and error checks
+- RNA: Improve fusion specificity
+- MANTA-125 add experimental large insertion calling (default off)
+- MANTA-125 add tier2 permissive split score to reduce small somatic
+FP deletions
+- MANTA-125 add tier2 chimera rate to reduce somatic FP calls 
+v0.18.0
+- MANTA-125 modify pair weight for small SVs
+- MANTA-120 Improve stability of SV scoring as a function of read length
+v0.17.0
+- Filter SA split read segments by MAPQ value
+- MANTA-116 better handle BWA-mem SA split-reads for inversions
+- MANTA-118 static libstdc++ for gcc 4.5+
+v0.16.0
+- MANTA-117 add somatic quality score
+- fix SA tag parsing
+- MANTA-27 accept bam/fasta filenames with spaces
+v0.15.0
+- MANTA-108 combine clip/semi-aligned evidence detection, don't detect
+overlapping reads as assembly evidence
+- MANTA-98 make fewer bam scans during scoring
+- MANTA-106 add high depth limit to candgen and assembler
+- MANTA-75 Better match reads to SV candidates to improve
+runtime and lower repeat observations (part 2)
+- MANTA-105 filter poorly supported candidates to reduce per-edge compute time
+- MANTA-103 fix issue in RNA and WES modes introduced by MANTA-99
+v0.14.0
+- MANTA-102 filter calls with high MQ0 fraction
+- MANTA-99 add high-depth graph filter to improve FFPE runtime
+- MANTA-100 allow for neighboring variants during assembly
+- MANTA-83 sort vcfs in bam chrom order
+- MANTA-96 Keep matching read pairs after candidate generation
+read buffer fills
+- MANTA-89 Use semi-mapped read pairs to improve germline/somatic
+classification.
+- MANTA-92 Add edge runtime performance log
+- MANTA-75 Better match reads to SV candidates to improve
+runtime and lower repeat observations
+- MANTA-85 Increase uniformity of tags in vcf output
+v0.13.2
+- First complete pass at installation and user guide
+v0.13.1
+- MANTA-81 Fix small indel somatic false negatives introduced
+in MANTA-63
+- MANTA-80 Additional workflow options: run subsections of the
+genome, finer task parallelization control and merge multiple
+input BAMs per sample.
+v0.13.0
+- MANTA-63 Incorporate read-pair evidence into small SVs/indels
+- MANTA-77 Fix assertion for rna-seq test
+- MANTA-17 Include semi-aligned reads in discovery and scoring
+- MANTA-69 Update score/write filter to account for CIGAR and SA-read
+candidates, and new uniform candidate scheme for self-edges.
+- MANTA-70 Correct filters to allow for small inversion and tandem dup
+detection 
+- MANTA-68 SVLEN not set correctly for non-deletions
+- MANTA-64 Improve candidate generation for small regions
+- MANTA-43 allow manta installation to be relocated
+- MANTA-55 compile python code as part of build/install
+v0.12.1
+- MANTA-58 fix issue with breakends near contig boundaries
+- MANTA-61 add markdown-based user guide to build
+- MANTA-30 initial integration of known variant tracing framework
+v0.12
+- MANTA-20 incorporate split-reads into quality score
+- MANTA-42 SV finder mismatches various read pair / sv-candidate combinations
+- MANTA-53 Enable --rescore option in runWorkflow.py
+- MANTA-40 Don't call splicing-events in RNA-seq as deletions
+- MANTA-20 include split read counts for short reads
+- MANTA-44 Fix Rhodobacter analysis
+v0.11
+- Adjust all vcf output to pass vcf-validator
+- MANTA-20 fix split read breakpoint location
+v0.10.1
+- Fix low-frequency assertion due to unexpected alignment pattern
+v0.10
+- MANTA-20 Limit split read counts to those uniquely supporting each allele,
+where P(allele|read)>0.999
+- MANTA-20 Add likelihood based QUAL,GQ scores to diploid output, adjust
+thresholds of somatic output to incorporate ref pairs and split reads.
+- MANTA-41 Fails when chrom name not in [a-zA-z0-9_-]+
+- MANTA-25 Partial support for BWA-MEM SA split reads
+- MANTA-36 Segfault on RNA-Seq BAM input
+- MANTA-20 Combined reference spanning read and split read evidence per variant
+- MANTA-20 Diploid vcf output for non-tumor sample, diploid genotype inference score still todo
+- MANTA-39 prevent crash on large CIGAR deletions
+- MANTA-20 split read evidence counts for all large spanning SVs
+v0.9
+- MANTA-20 preliminary work on this branch allows assembly skip and control of min indel candidate size and min indel score size
+- MANTA-33 reduce SV graph ram requirement to ~1/3 of its previous value, increase all post-merge task memory requests.
+- MANTA-17 merged shadow reads into assembly and adjusted assembly parameters. Large (50+ base) insertion sensitivity improves by ~.35-.4 as a result.
+- Improvements to vcf output and cmake build.
+v0.8
+- MANTA-28 Add prototype discovery/local-assembly of small events down to 10 bases 
+- MANTA-24 Better handle very high depth and chimeric noise complexity based
+on BWA-mem FFPE examples
+- MANTA-26 Extend fragment stats to provide estimate of full fragment size
+distribution
+- Large event assembly fixes
+- MANTA-23 enable use of pre-existing depth and stats files (for sparse bams)
+v0.7
+- Add assembly of large-event breakends and basepair resolution SV reporting
+- MANTA-19 Correctly parse large deletion reads from Isaac and incorporate this into discovery
+v0.6
+- Fix sensitivity problems caused by unexpected proper pair bit settings, fix several self-edge issues. Detect intrachrom variants down to ~2kb.
+v0.5
+- Expand POC calls to include intrachromosomal variants down to ~5kb.
+- Minor modifications to method based on FFPE testing.
+v0.4
+- POC somatic transloc output
+v0.3
+- POC translation of graph into candidate transloc vcf
+v0.2
+- working proof of concept denoised sv locus graph
+v0.1
+- initial prototype code tag
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..94a9ed0
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,674 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..4432dba
--- /dev/null
+++ b/README.md
@@ -0,0 +1,79 @@
+Manta Structural Variant Caller
+===============================
+
+Manta calls structural variants (SVs) and indels from mapped
+paired-end sequencing reads. It is optimized for analysis of germline
+variation in small sets of individuals and somatic variation in
+tumor/normal sample pairs. Manta discovers, assembles and scores
+large-scale SVs, medium-sized indels and large insertions within a
+single efficient workflow. The method is designed for rapid analysis
+on standard compute hardware: NA12878 at 50x genomic coverage is
+analyzed in less than 20 minutes on a 20 core server, and most WGS
+tumor/normal analyses can be completed within 2 hours. Manta combines
+paired and split-read evidence during SV discovery and scoring to
+improve accuracy, but does not require split-reads or successful
+breakpoint assemblies to report a variant in cases where there is
+strong evidence otherwise. It provides scoring models for germline
+variants in small sets of diploid samples and somatic variants in
+matched tumor/normal sample pairs. There is experimental support for
+analysis of unmatched tumor samples as well. Manta accepts input read
+mappings from BAM or CRAM files and reports all SV and indel inferences
+in VCF 4.1 format. See the [user guide] [UserGuide] for a full
+description of capabilities and limitations.
+
+[UserGuide]:docs/userGuide/README.md
+
+Methods and benchmarking details are described in:
+
+Chen, X. *et al.* (2016) Manta: rapid detection of structural variants and
+indels for germline and cancer sequencing applications. *Bioinformatics*,
+32, 1220-1222. [doi:10.1093/bioinformatics/btv710][bpaper]
+
+...and the corresponding [open-access pre-print][preprint].
+
+[bpaper]:https://dx.doi.org/10.1093/bioinformatics/btv710
+[preprint]:http://dx.doi.org/10.1101/024232
+
+
+License
+-------
+
+Manta source code is provided under the [GPLv3 license] (LICENSE.txt).
+Manta includes several third party packages provided under other
+open source licenses, please see [COPYRIGHT.txt] (COPYRIGHT.txt)
+for additional details.
+
+
+Getting Started
+---------------
+
+For linux users, it is recommended to start from the most recent
+[binary distribution on the Manta releases page] [releases], this
+distribution can be unpacked, moved to any convenient directory and
+tested by [running a small demo](docs/userGuide/installation.md#demo)
+included with the release distribution. Manta can also be installed
+and run on OS X. Please see the [installation instructions](docs/userGuide/installation.md)
+for full build and installation details of all supported cases.
+
+[releases]:https://github.com/Illumina/manta/releases
+
+
+Data Analysis and Interpretation
+--------------------------------
+
+After completing installation, see the [Manta user guide] [UserGuide]
+for instructions on how to run Manta, interpret results and estimate
+hardware requirements/compute cost, in addition to a high-level methods
+overview.
+
+
+Manta Code Development
+----------------------
+
+For manta code development and debugging details, see the
+[Manta developer guide] [DeveloperGuide]. This includes details
+on Manta's developement protocols, special build instructions,
+recommended workflows for investigating
+calls, and internal documentation details.
+
+[DeveloperGuide]:docs/developerGuide/README.md
diff --git a/configure b/configure
new file mode 100755
index 0000000..1e9db38
--- /dev/null
+++ b/configure
@@ -0,0 +1,320 @@
+#!/usr/bin/env bash
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+
+#
+# Top level configuration file for *nix like OS's, note that there
+# is legacy support for both cygwin and minGW in here but neither
+# have been supported for serveral years.
+#
+
+set -o nounset
+set -o pipefail
+
+
+usage()
+{
+    cat <<EOF
+
+Manta SV caller build configuration
+
+Usage: $0 [options]
+
+Options: [defaults in brackets after descriptions]
+
+Configuration:
+  --help                  print this message
+  --verbose               display more information (enables CMAKE_VERBOSE_MAKEFILE)
+  --jobs=N                build cmake and boost in N parallel jobs if needed [$parallel_jobs]
+  --with-cmake=CMAKE      specify the cmake executable [cmake]
+  --with-eclipse          create the eclipse project files
+  --with-version-control  create the eclipse project in the source tree to
+                          allow version control within eclipse
+  --build-type=TYPE       specify the build type for CMake (affects compiler
+                          options). Allowed values are "Debug", "Release",
+                          "RelWithDebInfo", "ASan" [$build_type]
+                            Debug: No optimization and all debug symbols
+                            Release: All portable optimization
+                            RelWithDebInfo: Most optimizations, try to keep stack trace info
+                            ASan: Light optimization with google addresss sanitizer on
+
+Directory and file names:
+  --prefix=PREFIX         install files in tree rooted at PREFIX
+                          [$prefix_dir]
+
+Some influential environment variables:
+  BOOST_ROOT       root location of the boost library and headers
+  CC               C compiler command
+  CXX              C++ compiler command
+
+Use these variables to override the choices made by 'configure' or to help
+it to find libraries and programs with nonstandard names/locations. Typically
+CC and CXX must be provided together to refer to the c and c++ front-ends of
+the same compiler to ensure a successful build.
+
+EOF
+    exit 2
+
+#  CXXFLAGS         C++ compiler flags
+#  LDFLAGS          linker flags, e.g. -L<lib dir> if you have libraries in a
+#                   nonstandard directory <lib dir>
+#  CPPFLAGS         C/C++ preprocessor flags, e.g. -I<include dir> if you have
+#                   headers in a nonstandard directory <include dir>
+#  CMAKE_OPTIONS    CMake command line options
+}
+
+
+#
+# utilities:
+#
+clog ()
+{
+    echo $@ 1>&2
+}
+
+# rel to absolute path. works for existing paths only
+rel2abs ()
+{
+    (cd "$1" && pwd -P)
+}
+
+
+# Helper function to fix windows paths.
+fix_slashes ()
+{
+    echo "$1" | sed 's/\\/\//g'
+}
+
+
+create_path ()
+{
+    mkdir -p "$1" || exit 1
+    rel2abs "$1" || exit 1
+}
+
+
+
+#
+# Detect system and directory information.
+#
+system="`uname`"
+processor="`uname -p`"
+arch="`uname -a`"
+root_dir="`echo $0 | sed -n '/\//{s/\/[^\/]*$//;p;}'`"
+root_dir="$(rel2abs $root_dir)"
+redist_dir="${root_dir}/redist"
+bootstrap_dir="${root_dir}/src/cmake/bootstrap"
+build_dir="$(pwd -P)"
+
+
+# Determine whether this is a MinGW environment.
+system_mingw=false
+if echo "${system}" | grep MINGW >/dev/null 2>&1; then
+    system_mingw=true
+fi
+
+# Determine whether this is OS X
+system_darwin=false
+if echo "${system}" | grep Darwin >/dev/null 2>&1; then
+    system_darwin=true
+fi
+
+# Choose the default install prefix.
+get_default_prefix() {
+  if ${system_mingw}; then
+    if [ "x${PROGRAMFILES}" != "x" ]; then
+        echo `fix_slashes "${PROGRAMFILES}/CMake"`
+    elif [ "x${ProgramFiles}" != "x" ]; then
+        echo `fix_slashes "${ProgramFiles}/CMake"`
+    elif [ "x${SYSTEMDRIVE}" != "x" ]; then
+        echo `fix_slashes "${SYSTEMDRIVE}/Program Files/CMake"`
+    elif [ "x${SystemDrive}" != "x" ]; then
+        echo `fix_slashes "${SystemDrive}/Program Files/CMake"`
+    else
+        echo "c:/Program Files/CMake"
+    fi
+  else
+    echo "/usr/local"
+  fi
+}
+
+
+#
+# defaults:
+#
+prefix_dir=$(get_default_prefix)
+build_type=Release
+cmake=
+parallel_jobs=1
+cmake_generator="Unix Makefiles"
+is_verbose=false
+if [ -z "${CMAKE_OPTIONS+xxx}" ]; then CMAKE_OPTIONS=""; fi
+
+# Parse arguments
+for a in "$@"; do
+    if   echo $a | grep "^--prefix=" > /dev/null 2> /dev/null; then
+        prefix_dir=`echo $a | sed "s/^--prefix=//"`
+        prefix_dir=`fix_slashes "${prefix_dir}"`
+    elif echo $a | grep "^--help" > /dev/null 2> /dev/null; then
+        usage
+    elif echo $a | grep "^-h" > /dev/null 2> /dev/null; then
+        usage
+    elif echo $a | grep "^--with-cmake=" > /dev/null 2> /dev/null; then
+        cmake=`echo $a | sed "s/^--with-cmake=//"`
+        cmake=`fix_slashes "${cmake}"`
+    elif echo $a | grep "^--build-type=" > /dev/null 2> /dev/null; then
+        build_type=`echo $a | sed "s/^--build-type=//"`
+    elif echo $a | grep "^--with-eclipse" > /dev/null 2> /dev/null; then
+        cmake_generator="Eclipse CDT4 - Unix Makefiles"
+    elif echo $a | grep "^--with-version-control" > /dev/null 2> /dev/null; then
+        CMAKE_OPTIONS="$CMAKE_OPTIONS -DECLIPSE_CDT4_GENERATE_SOURCE_PROJECT=TRUE"
+    elif echo $a | grep "^--verbose" > /dev/null 2> /dev/null; then
+        is_verbose=true
+    elif echo $a | grep "^--jobs=" > /dev/null 2> /dev/null; then
+        parallel_jobs=`echo $a | sed "s/^--jobs=//"`
+    else
+        clog "ERROR: unknown argument: $a"
+        exit 2
+    fi
+done
+
+
+#
+# prevent in-source builds (but allow usage to be triggered first)
+#
+if [ "$root_dir" == "$build_dir" ]; then
+    cat <<EOF 1>&2
+
+ERROR: This project cannot be built in the source directory. Please run
+       configuration in a separate directory. Example:
+
+    """
+    mkdir ../build && cd ../build
+    \${MANTA_ROOT_PATH}/configure [configure_options]
+    """
+
+EOF
+    exit 1
+fi
+
+#
+# setup cmake options
+#
+CMAKE_OPTIONS="$CMAKE_OPTIONS -DCMAKE_BUILD_TYPE:STRING=${build_type}"
+CMAKE_OPTIONS="$CMAKE_OPTIONS -DCMAKE_PARALLEL:STRING=${parallel_jobs}"
+
+if $is_verbose; then
+    CMAKE_OPTIONS="$CMAKE_OPTIONS -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DBoost_DEBUG:BOOL=ON"
+fi
+
+
+if [ "x${prefix_dir}" != "x" ]; then prefix_dir=$(create_path "${prefix_dir}") || exit 1; fi
+
+CMAKE_OPTIONS="-DCMAKE_INSTALL_PREFIX:PATH=\"$prefix_dir\" $CMAKE_OPTIONS"
+
+
+
+# create the build directory if necessary
+if ! [ -d "${build_dir}" ]; then
+    mkdir "${build_dir}"
+    if [ "$?" != 0 ]; then
+        clog "ERROR: Couldn't create the build directory: ${build_dir}"
+        exit 1
+    fi
+fi
+
+
+
+#
+# install cmake if required:
+#
+cmake_install_dir="${build_dir}/bootstrap/cmake"
+if [ "x${cmake}" == "x" ] ; then
+    cmake=$(bash ${bootstrap_dir}/installCmake.bash ${redist_dir} ${cmake_install_dir} ${parallel_jobs})
+
+    if [ "$?" != "0" ]; then
+        clog "ERROR: Failed to verify or install cmake"
+        exit 1
+    fi
+
+    bootstrapped_cmake="${cmake_install_dir}/bin/cmake"
+    if [ $cmake == $bootstrapped_cmake ]; then
+        echo "Using installed cmake: $cmake"
+    else
+        echo "Using existing cmake: $cmake"
+    fi
+fi
+
+
+# display information if required
+if $is_verbose; then
+    cat<<EOF
+Source  directory: ${root_dir}
+Prefix  directory: ${prefix_dir}
+Build   directory: ${build_dir}
+Cmake  executable: ${cmake}
+
+EOF
+fi
+
+
+
+#
+# finally, invoke cmake
+#
+cmake_command="${cmake} -H\"${root_dir}\" -B\"${build_dir}\" -G\"${cmake_generator}\" ${CMAKE_OPTIONS}"
+
+if $is_verbose ; then
+    cat<<EOF
+
+Running on: $arch
+Configuring the build directory with:
+    $cmake_command
+EOF
+fi
+
+eval $cmake_command
+
+
+if [ "$?" != 0 ]; then
+    cat<<EOF 1>&2
+Couldn't configure the project:
+
+$cmake_command
+
+Moving CMakeCache.txt to CMakeCache.txt.removed
+
+EOF
+    if [ -f ${build_dir}/CMakeCache.txt ]; then
+        rm -f ${build_dir}/CMakeCache.txt.removed && mv ${build_dir}/CMakeCache.txt ${build_dir}/CMakeCache.txt.removed
+    fi
+    exit 1
+fi
+
+
+cat<<EOF
+
+The build directory ${build_dir} was configured successfully
+
+Type "make -C ${build_dir}" to build
+
+EOF
+
+
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..1f5c80b
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,8 @@
+# Manta Documentation
+
+This directory aggregates all project documentation. The documentation is divided into the following sections:
+
+  * [User Guide](userGuide/README.md) - This is the primary documentation resource for all Manta users.
+  * [Developer Guide](developerGuide/README.md) - This provides guidelines for anyone contributing to the Manta source.
+  * [Methods](methods/README.md) - This directory aggregates detailed method and model descriptions.
+
diff --git a/docs/developerGuide/ID.md b/docs/developerGuide/ID.md
new file mode 100644
index 0000000..3e6d6c0
--- /dev/null
+++ b/docs/developerGuide/ID.md
@@ -0,0 +1,32 @@
+# Manta Developer Guide - VCF ID field
+
+[Developer Guide Home](README.md)
+
+Manta includes an identifier (VCF `ID` field) for every VCF record in its output. This ID is guaranteed to be unique within any VCF output by manta, as requierd by the VCF spec.
+
+The `ID` field provides information linking the variant call to the SV association graph. It can be useful for certain debugging procedures.
+
+The identifier has two minor variants, one used for translocation breakends and one variant used for all other VCF records. An example ID for a translocation record is:
+
+```
+MantaBND:5862:0:1:0:0:0:1
+```
+
+An example for other record types is:
+
+```
+MantaDEL:47029:3:9:0:0:0
+```
+
+We describe the first example ID by describing each sub-field when the ID is split on the colon character. Note the [breakend graph description](breakendGraph.md) may be helpful for reference here.
+
+index | ID component name | Value from above example | Description
+----- | ----------------- | ------------------------ | -----------
+1 | Label | MantaBND | Simple text label appending "Manta" to the SVType. The ID is still unique if this component is removed.
+2 | LocusID | 5862 | Index of the SV breakend graph **locus**. Each locus is a disjoint subgraph of the full breakend graph.
+3 | Node1ID | 0 | Index of the first SV breakend graph **node** forming the graph edge used to discover this variant.
+4 | Node2ID | 1 | Index of the second SV breakend graph **node** forming the graph edge used to discover this variant. If Node1ID==Node2ID this is a self-edge.
+5 | CandidateID | 0 | Each graph edge is analyzed for evidnece of specific SV or indel candidates. This index provides the index of the source candidate among all candidates associated with this edge.
+6 | AssemblyID | 0 | For each candidate multiple contigs/paths are extracted from the assembly graph, this describes the index of the path used to generate this candidate or "0" for an IMPRECISE variant
+7 | SegmentID | 0 | Multiple small variants can be extracted from each assembly contig/path alignment, this describes the alingment segment index used to produce the candidate variant. This index can only be non-zero for small indels.
+8 | BNDID | 1 | This sub-field is only used in BND records. BND records are different than other VCF record types in that one variant is represented by two breakends. This component is set to either 0 or 1 to indicate the breakend number of the variant.
diff --git a/docs/developerGuide/README.md b/docs/developerGuide/README.md
new file mode 100644
index 0000000..c710925
--- /dev/null
+++ b/docs/developerGuide/README.md
@@ -0,0 +1,231 @@
+Manta Developer Guide
+=====================
+
+## Table of Contents
+[] (BEGIN automated TOC section, any edits will be overwritten on next source refresh)
+* [Scope](#scope)
+* [Developer Build Notes](#developer-build-notes)
+  * [Building from source repository vs. versioned code distribution:](#building-from-source-repository-vs-versioned-code-distribution)
+  * [Source auto-documentation](#source-auto-documentation)
+  * [Improving build time](#improving-build-time)
+    * [ccache](#ccache)
+    * [Bundled dependencies](#bundled-dependencies)
+  * [General Debugging: Address Sanitizer](#general-debugging-address-sanitizer)
+  * [General Debugging: Inspecting temporary files](#general-debugging-inspecting-temporary-files)
+  * [Windows development support](#windows-development-support)
+  * [Automating Portable Binary Builds](#automating-portable-binary-builds)
+* [Coding Guidelines](#coding-guidelines)
+  * [Source formatting](#source-formatting)
+  * [Error handling](#error-handling)
+    * [General Policies](#general-policies)
+    * [Exception Details](#exception-details)
+    * [Logging](#logging)
+  * [Unit tests](#unit-tests)
+* [Special Topic Guides](#special-topic-guides)
+[] (END automated TOC section, any edits will be overwritten on next source refresh)
+
+## Scope
+
+This guide provides:
+* protocols for contributing new or modified methods
+* methods to debug stability or runtime issues
+* methods to debug suspected false or missing variant calls
+* high-level architectural documentation
+
+Information is added as pertinent questions/discussions come up in the contributor community,
+so this guide is not intended to provide complete coverage of the above topics.
+
+For end user documentation describing how to run an analysis and interpret its output,
+please see the [User Guide](../userGuide/README.md).
+
+## Developer Build Notes
+
+The following section provides a supplement to the standard build
+instructions including additional details of interest to methods
+developers.
+
+### Building from source repository vs. versioned code distribution:
+
+When the source repository is cloned from github, it is configured for development
+rather than user distribution. In this configuration all builds are strict
+such that:
+* all warnings are treated as errors
+* if cppcheck is found any detected cppcheck issue is converted to a build error
+
+Note that all unit tests are always run and required to pass for the build
+procedure to complete.
+
+### Source auto-documentation
+
+If doxygen is found in the path (and optionally dot as well) during
+build configuration, then c++ documentation is available as an
+additional "doc" target for the makefile:
+
+    make doc
+
+There is no installation for the documentation outside of the build
+directory, the root doxygen page after completing this target will be:
+
+    ${MANTA_BUILD_PATH}/c++/doxygen/html/index.html
+
+### Improving build time
+
+#### ccache
+
+The build system is configured to use ccache whenever this is
+found in the path
+
+#### Bundled dependencies
+
+Note that during the configuration step, the following dependencies will be
+built from source if they are not found:
+
+* cmake 2.8.0+
+* boost 1.56.0+
+
+To avoid the extra time associated with this step, ensure that (1)
+cmake 2.8.0+ is in your PATH and (2) BOOST\_ROOT is defined to point
+to boost 1.56.0 or newer.
+
+### General Debugging: Address Sanitizer
+
+The build system offers first-class support for google address sanitizer
+when a supporting compiler is detected. To use this mode, start a fresh
+installation process with the additional configure option `--build-type=ASan`,
+extending from the configuration example in the above build instructions, use:
+
+    ../manta-A.B.C.release_src/src/configure --jobs=4 --prefix=/path/to/install --build-type=ASan
+
+### General Debugging: Inspecting temporary files
+
+Manta's configuration step includes an extended option to keep all temporary
+files which would normally be deleted by the workflow as it runs. Keeping these
+files may be helpful in various debugging scenarios. To turn on this option, add
+`--retainTempFiles` as a configuration argument:
+
+    configManta.py [other_options...] --retainTempFiles
+
+### Windows development support
+
+Manta does not link or run on windows. However, the build system does
+facilitate Visual Studio (VS) users. When cmake configuration is run
+on windows, all linking is disabled and most third party libraries are
+unpacked for header include access, but are not compiled. Cmake VS
+solutions allow the c++ code to be browsed, analyzed and compiled to
+the library level.  Note that unit test codes are compiled to
+libraries but cannot be run.
+
+C++11 features in use require at least VS2013. A Windows
+installation of cmake is also required to configure and compile.
+Note that the minimum cmake version is 3.1.0 for Windows.
+
+### Automating Portable Binary Builds
+
+A script is provided to enable a dockerized build process which
+issues Centos5+ or Centos6+ binary tarballs. To do so, ensure you
+have permission to `docker run` on the current system and execute the
+following script:
+
+```
+${MANTA_ROOT_PATH}/scratch/docker/deployment/dockerBuildBinaryTarball.bash ${MANTA_ROOT_PATH2} ${BINARY_BUILD_PREFIX}
+```
+
+The term `${MANTA_ROOT_PATH2}` can point to the current git repo (ie. `${MANTA_ROOT_PATH}`),
+or to an extracted Manta source tarball previously created using the script:
+
+```
+${MANTA_ROOT_PATH}/scratch/make_release_tarball.bash
+```
+
+The choice of virtualized build environment is hard-coded in the deploy script for the time being,
+see the `builderImage` variable.
+
+## Coding Guidelines
+
+### Source formatting
+
+* Basic formatting restrictions on c++ code:
+  * spaces instead of tabs
+  * 4-space indents
+  * "ANSI" bracket style
+* Note the above restrictions are enforced by an astyle script which is occasionally run on the master branch (see [run_cxx_formatter.bash](../../scratch/source_check_and_format/run_cxx_formatter.bash))
+* Otherwise, follow local code conventions
+
+### Error handling
+
+#### General Policies
+
+* Exceptions with informative contextual details are encouraged whenever possible.
+* To quickly express invariants it is acceptable to add `assert()`'s first, and transition to exceptions as code stabilizes.
+* Note that the build process will never define `NDEBUG` to compile out assert statements, even in release code.
+* Exceptions are never thrown with the intent to recover -- this is not a web browser. The goal is to:
+  * Fail at the first sign of trouble.
+  * Provide as much helpful contextual information as possible, including context from multiple layers of the stack.
+* Warnings are discouraged. If considering a warning you should probably just fail per the above policy.
+
+#### Exception Details
+
+* Preferred exception pattern is to use an internal class derived from `boost::exception`:
+
+```c++
+
+#include "common/Exceptions.hh"
+
+#include <sstream>
+
+void
+foo(const char* name)
+{
+    using namespace illumina::common;
+
+    std::ostringstream oss;
+    oss << "ERROR: unrecognized variant scoring model name: '" << name << "'\n";
+    BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+}
+```
+
+* Context at the original throw site is often supplemented by a 'catch and release' block to add
+information at a few critical points on the stack. Typically this is information which
+is unavailable at the throw site. Example code is:
+
+```c++
+try
+{
+    realign_and_score_read(_opt,_dopt,sif.sample_opt,_ref,realign_buffer_range,rseg,sif.indel_sync());
+}
+catch (...)
+{
+    log_os << "ERROR: Exception caught in align_pos() while realigning segment: "
+	   << static_cast<int>(r.second) << " of read: " << (*r.first) << "\n";
+    throw;
+}
+```
+
+#### Logging
+
+* At the workflow (python) layer, please write all logging messages through pyflow's logging interface as follows:
+```python
+self.flowLog("Initiating Starling workflow version: %s" % (__version__)
+```
+
+* At the binary (c++) layer, there is no logger at present. Direct all error messaging to `std::cerr`.
+
+### Unit tests
+
+* Unit tests are enabled for a subset of the c++ code
+* All tests use the boost unit test framework
+* All unit tests are required to run and pass as part of every build (including end-user builds)
+* Unit tests are already enabled for every library "test" subdirectory, additional tests in these directories will be automatically detected
+  * Example [svgraph unit tests directory](../../src/c++/lib/svgraph/test)
+
+## Special Topic Guides
+
+The following items provide more in-depth details on a subsection of the methods/debugging protocol, etc.
+
+* [Alignment Library](alignment.md)
+* [Breakend Graph Queries](breakendGraph.md)
+* [Debug Single SV](debugSingleSV.md)
+* [Debug Full Manta Run](debugSingleSV.md)
+* [Manta VCF ID field](ID.md)
+* [Test assembler directly from BAM input](testAssembler.md)
+
diff --git a/docs/developerGuide/alignment.md b/docs/developerGuide/alignment.md
new file mode 100644
index 0000000..a983920
--- /dev/null
+++ b/docs/developerGuide/alignment.md
@@ -0,0 +1,43 @@
+# Manta Developer Guide - Alignment Library
+
+[Developer Guide Home](README.md)
+
+## Table of Contents
+[] (BEGIN automated TOC section, any edits will be overwritten on next source refresh)
+* [Introduction](#introduction)
+* [Debugging](#debugging)
+* [Running single alignment unit tests](#running-single-alignment-unit-tests)
+[] (END automated TOC section, any edits will be overwritten on next source refresh)
+
+## Introduction
+
+This is a subsection of the manta developer guide focusing on the alignment library.
+
+## Debugging
+
+All alignment methods support debugging via the `DEBUG_ALN` symbol. When defined, the score matrix and backtrace pointer for each element in the
+alignment structure are streamed to stderr, in addition to the backtrace sequence. Due to the volume of output, this can be
+most usefully applied to a single, relatively small alignment unit test.
+
+For even richer debugging `DEBUG_ALN_MATRIX` can be defined. In this case the entire score matrix is saved (this is normally discarded
+as soon as each element row is no longer required), and the score+backpointer matrix for each state is printed to stderr. This is only
+effective for very small unit test cases.
+
+## Running single alignment unit tests
+
+Note that unit tests can be run outside of the manta build system, and additionally reduced to run a single test suite or single test. This
+can be usefully combined with the rich debug output options above. Note that this test selection interface is a standard defined by
+the boost unit test library.
+
+To run unit tests directly. First go through the standard build procedure. Then from the build directory, the alignment unit tests can be run
+by executing:
+
+    src/c++/lib/alignment/test/manta_unit_test_alignment
+
+You can run the tests for the, e.g. intron aligner only using the `-t` option as follows:
+
+    src/c++/lib/alignment/test/manta_unit_test_alignment -t test_GlobalJumpIntronAligner
+
+...or select as single test out of this test suite:
+
+    src/c++/lib/alignment/test/manta_unit_test_alignment -t test_GlobalJumpIntronAligner/test_GlobalJumpAlignerSplice
diff --git a/docs/developerGuide/breakendGraph.md b/docs/developerGuide/breakendGraph.md
new file mode 100644
index 0000000..f34bbd2
--- /dev/null
+++ b/docs/developerGuide/breakendGraph.md
@@ -0,0 +1,32 @@
+# Manta Developer Guide - Breakend Graph
+
+[Developer Guide Home](README.md)
+
+## Table of Contents
+[] (BEGIN automated TOC section, any edits will be overwritten on next source refresh)
+* [Summary](#summary)
+* [Querying the graph](#querying-the-graph)
+[] (END automated TOC section, any edits will be overwritten on next source refresh)
+
+## Summary
+
+The breakend graph is a central intermediate  in the manta workflow. It contains the following inforation:
+* "Nodes" – this are contiguous regions of the genome. These are associated with one or more breakends.
+  * Evidence range: Each node has an additional chromosomal range where it's read evidence was originally aligned. (if the node is built from indirect evidence only, then evidence range should be the same as the node range.
+  * Edges between nodes – Every node with an edge has a return pointer in a properly formatted SV graph, but the evidence count on an out-edge represents that evidence for the edge was observed at the node the edge is coming from.
+* "Loci" – A locus is a disjoint subgraph
+
+## Querying the graph
+The graph can be queried as follows:
+
+* Given a genomic region, write out all nodes which intersect that region
+* List a specific locus id (such as one identified when querying a region)
+* List the whole graph
+
+Example:
+
+    ${MANTA_INSTALL_ROOT}/libexec/DumpSVLoci --graph-file foo --region chr10:1000000-1001000
+
+You can also get summary metrics from the graph:
+* List of node count, edge count, observation count, etc. for every locus
+* Summary of total reads used and total reads cleaned out as noise edges.
diff --git a/docs/developerGuide/debugFullRun.md b/docs/developerGuide/debugFullRun.md
new file mode 100644
index 0000000..2330cc7
--- /dev/null
+++ b/docs/developerGuide/debugFullRun.md
@@ -0,0 +1,46 @@
+# Manta Developer Guide - Debugging a full Manta run
+
+[Developer Guide Home](README.md)
+
+## Table of Contents
+[] (BEGIN automated TOC section, any edits will be overwritten on next source refresh)
+* [Summary](#summary)
+* [Rerun SVCandidateGeneration without recreating the SVLocus graph](#rerun-svcandidategeneration-without-recreating-the-svlocus-graph)
+* [Comparing VCF output between runs](#comparing-vcf-output-between-runs)
+* [Options to accelerate a small test case](#options-to-accelerate-a-small-test-case)
+[] (END automated TOC section, any edits will be overwritten on next source refresh)
+
+## Summary
+
+This page describes debug/analysis capabilities which are especially useful to full Manta runs -- for instance for scenarios when iterating on a general improvement to the methods. When debugging a single SV, see the related page on [Debugging a single SV in Manta](debugSingleSV.md)
+
+## Rerun SVCandidateGeneration without recreating the SVLocus graph
+
+This is useful if you're working on a component of candidate generation/scoring which doesn't impact graph creation, and frequently rerunning a test. To use this option provide the '–rescore" option to runWorkflow.py. When this is provided candidate generation and scoring will always be re-run, but the graph will only be created if it doesn't already exist. Example
+
+    ${RUN_DIR}/runWorkflow.py -m sge -j 24 --rescore
+
+
+## Comparing VCF output between runs
+To assist in evaluating the quality of predictions from a full run compared to a stable benchmark (master, etc), there is a manta utility script to suppress some of the noise expected from a simple 'diff' of two manta vcfs. It takes two vcf files as arguments. These can be gzipped or uncompressed. A usage example is:
+
+    ${MANTA_GIT_CLONE_DIR}/scratch/util/compareMantaVcfs.bash ../m67_test/m67_12_redo_control/results/variants/diploidSV.vcf.gz m63/results/variants/diploidSV.vcf.gz | grep -v MaxDepth | less
+
+
+## Options to accelerate a small test case
+
+If not running an analysis on single small genome segment (see [Debugging a single SV in Manta](debugSingleSV.md)) there are various options to make small bam subsegments run a bit faster:
+
+At config time you can reduce/increase the total number of tasks by making each job do more/less :
+
+```
+    --scanSizeMb=scanSizeMb
+                        Maximum sequence region size (in Mb) scanned by each
+                        task during SV locus graph generation. (default: 12)
+
+    --candidateBins=candidateBins
+                        Provide the total number of tasks which candidate
+                        generation  will be sub-divided into. (default: 256)
+```
+
+At run time you can shut down stderr logging, this log is replicated to $runDir/workflow/pyflow.data/logs/pyflow_log.txt so there is no loss of information. To do so, provide 'runWorkflow.py' with the "--quiet" option.
diff --git a/docs/developerGuide/debugSingleSV.md b/docs/developerGuide/debugSingleSV.md
new file mode 100644
index 0000000..f0d4a3a
--- /dev/null
+++ b/docs/developerGuide/debugSingleSV.md
@@ -0,0 +1,123 @@
+# Manta Developer Guide - Debugging a single SV in Manta
+
+[Developer Guide Home](README.md)
+
+## Table of Contents
+[] (BEGIN automated TOC section, any edits will be overwritten on next source refresh)
+* [Summary](#summary)
+* [Scenario 0: Debug SV call in both graph creation and SV candidate generation steps](#scenario-0-debug-sv-call-in-both-graph-creation-and-sv-candidate-generation-steps)
+* [Scenario 1 : Debug gold-standard SV call which is already covered by an edge in the SVLocus graph](#scenario-1--debug-gold-standard-sv-call-which-is-already-covered-by-an-edge-in-the-svlocus-graph)
+  * [Step 1: Identify the graph edge corresponding to the SV](#step-1-identify-the-graph-edge-corresponding-to-the-sv)
+    * [S1 - Step 1A : Query Node in the SV Locus graph](#s1---step-1a--query-node-in-the-sv-locus-graph)
+    * [S1 - Step 1B : Query Node in the SV Locus graph](#s1---step-1b--query-node-in-the-sv-locus-graph)
+    * [S1 - Step 1C : Query Locus in the SV Locus graph](#s1---step-1c--query-locus-in-the-sv-locus-graph)
+    * [S1 - Step 2 : Run candidate generation on specific SV locus or specific SV locus edge](#s1---step-2--run-candidate-generation-on-specific-sv-locus-or-specific-sv-locus-edge)
+* [Debugging Infrastructure TODO](#debugging-infrastructure-todo)
+[] (END automated TOC section, any edits will be overwritten on next source refresh)
+
+## Summary
+
+Manta tries to reduce intermediate I/O as much as possible, which is helpful in production, but not so for development/debug. As debuging cases come up, speciallized debug/localized running modes have been added to certain Manta tools, and more will certainly be added. This page documents the workflows that exist – especially those that facilitate debugging a single known FN or FP case. We also note workflows which would be useful and should be added in the future. For related debug disc [...]
+
+## Scenario 0: Debug SV call in both graph creation and SV candidate generation steps
+
+Manta workflow has the ability to run one to many sub-segments of the genome, which can accelerate debugging for by setting the region(s) to cover the breakends of any SVs of interest.
+
+To enable a regional build, simply specify the additional flag "--region" at configuration-time and provide a region in samtools format. This region flag can be repeated multiple times to (for instance) cover both breakend regions of a translocation. Documentation for this and other hidden debug/development options appears when you provide the '--allHelp' flag as follows:
+
+    ${MANTA_INSTALL_ROOT}/bin/configManta.py --allHelp
+
+For a small input region it would typically be desirable to reduce the number of GenerateSVCandidate worker tasks to further accelerate the debug build by avoid additional sparse task overhead. The number of SVCandidate generation tasks can be changed with the '--candidateBins' flag. A full example of a rapid debug region run is:
+
+    ${MANTA_INSTALL_ROOT}/bin/configManta.py --normalBam myBam.bam --region chr2:19000-20000 --candidateBins 1
+
+An example debug build for a translocation is:
+
+    ${MANTA_INSTALL_ROOT}/bin/configManta.py --normalBam myBam.bam --tumorBam myTumorBam.bam --region chr2:19000-20000 --region chr20:1000-2000 --candidateBins 4
+
+## Scenario 1 : Debug gold-standard SV call which is already covered by an edge in the SVLocus graph
+
+If a known clean SV is either missing from the output, or we would like to repeatedly/quickly run just this one SV during development of a new feature, there is limited support to run a specific SV (actually a specific disjoint subgraph of the SVgraph – often for gold-standard calls, the call is the only member of a disjoint sub-graph).
+
+### Step 1: Identify the graph edge corresponding to the SV
+
+Every edge in the breakend graph has a locus index (each locus is a connected sub-graph), a node index (nodes are numbered from 0 within each locus) and a second node index (this case equal the first node index for a self-edge). It is possible that there is no edge corresponding to your SV, in which case the FN problem extends all the way back to graph candidate generation, you can determine if this is the case in step 1B below.
+
+The following subsections of step1 describe how to extract these graph indices for an SV which is already being printed out at least to the candidate vcf file (Step 1A – easier case), or for the general case (Step 1B, a bit more involved).
+
+#### S1 - Step 1A : Query Node in the SV Locus graph
+
+The following record from a Manta candidate VCF shows the locus-index<sup>**A**</sup>, node1-index<sup>**B**</sup> and node2-index<sup>**C**</sup> , per the corresponding superscripts applied to each field:
+
+> chrX    70129275        MantaINV:572680<sup>**A**</sup>:1<sup>**B**</sup>:3<sup>**C**</sup>:0:0 A       <INV>   .       .       END=70140033;SVTYPE=INV;SVLEN=-10758;UPSTREAM_PAIR_COUNT=29;DOWNSTREAM_PAIR_COUNT=29;PAIR_COUNT=29;CIPOS=0,7;CIEND=-7,0;HOMLEN=7;HOMSEQ=ACATGGA
+
+If these are available for your SV of interest, then you can skip to Step 2 below.
+
+#### S1 - Step 1B : Query Node in the SV Locus graph
+
+Hypothesis generation starts from a graph of connected genomic regions called the SV locus graph. After manta has been run, a binary file containing the graph can be found in:
+
+    ${MANTA_RUN_DIR}/workspace/svLocusGraph.bin
+
+The most useful way to examine the content of the graph is by dumping all regions/edges which overlap a specific genomic region, for example:
+
+    ${MANTA_INSTALL_ROOT}/libexec/DumpSVLoci --graph-file svLocusGraph.bin --region chr2:2000000-2000100
+
+Making such a query over the region where either of a gold-standard SV's breakends are expected should reveal if the region was even detected as SV associated.
+
+For example, to check for the first breakend of COSMIC deletion COST17172, the following query can be run on the HCC1187 graph file:
+
+    ${MANTA_INSTALL_ROOT}/libexec/DumpSVLoci --graph-file svLocusGraph.bin --region chrX:154205937-154205937
+
+..which yields:
+
+```
+SVNode LocusIndex:NodeIndex : 13716:0
+LocusNode: count: 50 GenomeInterval: 23:[154205488,154206145) n_edges: 1 out_count: 50 evidence: [154205542,154205948)
+        EdgeTo: 1 out_count: 50
+```
+
+This shows that the locus index is **13716**
+
+#### S1 - Step 1C : Query Locus in the SV Locus graph
+
+In the output above (end of Step 1), we gain access to the locus index number of the disjoint subgraph which covers (at least) the COST17172 breakends This locus index is 13716 from the first line of the output above). Using this index number we can query the graph file for this disjoint subgraph to see all components connected to the initial region:
+
+    ${MANTA_INSTALL_ROOT}/libexec/DumpSVLoci --graph-file svLocusGraph.bin --locus-index 13716
+
+Result:
+
+```
+LOCUS BEGIN INDEX 13716
+NodeIndex: 0 LocusNode: count: 50 GenomeInterval: 23:[154205488,154206145) n_edges: 1 out_count: 50 in_count: 55 evidence: [154205542,154205948)
+        EdgeTo: 1 out_count: 50 in_count: 55
+NodeIndex: 1 LocusNode: count: 55 GenomeInterval: 23:[154226371,154226808) n_edges: 1 out_count: 55 in_count: 50 evidence: [154226586,154226908)
+        EdgeTo: 0 out_count: 55 in_count: 50
+LOCUS END INDEX 13716
+```
+
+This shows us that the deletion is supported by one edge in the SVLocus graph. We can also see that the edge connects node index 0 to node index 1. Together with the locus index, we have all graph indices required to run only the edge corresponding to our SV of interest.
+
+
+#### S1 - Step 2 : Run candidate generation on specific SV locus or specific SV locus edge
+
+We can also run Candidate SV generation for an entire locus, all edges which connect to one node in a locus or only one edge in a locus. To do this we extract the GenerateSVCandidates command from a manta run and add the `--locus-index ARG` flag. ARG can be:
+
+option | example | description
+------ | ------- | -----------
+--locus-index LocusIndex | --locus-index 13716 | Run all edges in the specified locus only
+--locus-index LocusIndex:NodeIndex | --locus-index 13716:0 | Run all edges which connect to the specified node in the specified locus
+--locus-index LocusIndex:NodeIndex1:NodeIndex2 | --locus-index 13716:0:1 | Run the single edge connecting the two specified nodes (or the self-edge if NodeIndex1==NodeIndex2)
+
+An example full command-line is:
+
+> ${MANTA_INSTALL_ROOT}/libexec/GenerateSVCandidates --align-stats /tmp/manta_test/assemble_test/testAssm3/workspace/alignmentStats.xml --graph-file /tmp/manta_test/assemble_test/testAssm3/workspace/svLocusGraph.bin --ref genome.fa --candidate-output-file /tmp/manta_test/assemble_test/testAssm3/workspace/svHyGen/candidateSV.0103.vcf --somatic-output-file /tmp/manta_test/assemble_test/testAssm3/workspace/svHyGen/somaticSV.0103.vcf --chrom-depth /tmp/manta_test/assemble_test/testAssm3/work [...]
+
+The additional `--locus-index ARG` command is highlighted, together with the new `--verbose` option. In the example, SV generation runs for the specified edge "13716:0:1" only. This makes it easier to run modifications of the SV generator with various types of verbose debugging outputs, etc...  To get started in this direction, the example includes the --verbose option to provide some quick high level logging without recompiling – for many problems more specific/noising debug output will [...]
+
+## Debugging Infrastructure TODO
+
+The above process could be more streamlined, especially for cases where an SV is part of a large disjoint subgraph. New features:
+* GenerateSVCandidates should accept a --region1 and --region2 argument, and only call SVGraph edges connecting those two regions.
+
+
diff --git a/docs/developerGuide/testAssembler.md b/docs/developerGuide/testAssembler.md
new file mode 100644
index 0000000..e993f0a
--- /dev/null
+++ b/docs/developerGuide/testAssembler.md
@@ -0,0 +1,24 @@
+# Manta Developer Guide - Assembler Test
+
+[Developer Guide Home](README.md)
+
+## Table of Contents
+[] (BEGIN automated TOC section, any edits will be overwritten on next source refresh)
+* [Introduction](#introduction)
+* [Operation](#operation)
+[] (END automated TOC section, any edits will be overwritten on next source refresh)
+
+## Introduction
+
+Manta's assembler is internally accessed through the assembler's API during SV analysis. For certain debugging scenarios it is helpful to be able to invoke the assembler on a specified set of input reads. The application `TestAssembler` provides such a capability, reading in read input from one or more bam files and producing a set of contigs in fasta format as output.
+
+## Operation
+
+Example command-line
+
+    ${MANTA_INSTALL_PATH}/libexec/TestAssembler --align-file foo.bam > contigs.fa
+
+Only use this with very small BAMs -- assumes everything is input.
+
+Manta itself has complex selection and orientation logic. In this routine, everything in the bam is selected as input to the assembler. Read orientation is changed only for unmapped reads with mapped read pairs, in which case the unmapped read will be given the opposite strand orientation of its mapped partner.
+
diff --git a/docs/methods/README.md b/docs/methods/README.md
new file mode 100644
index 0000000..23e79c6
--- /dev/null
+++ b/docs/methods/README.md
@@ -0,0 +1,14 @@
+Manta Methods
+=============
+
+This directory contains documents describing Manta's primary methods
+and any major experimental approaches. Methods documentation is
+maintained in latex.
+
+Each directory should contain a `makepdf.bash` script, which produces
+a `methods.pdf` file as output (so constrained to facilite automated
+testing of the latex source build). The build for each document should
+only require the contents of the standard `texlive` package -- any
+additional latex requirements should be included with the methods if
+possible.
+
diff --git a/docs/methods/primary/figure_data/jumpstate/jumpstate.dot b/docs/methods/primary/figure_data/jumpstate/jumpstate.dot
new file mode 100644
index 0000000..e7bf894
--- /dev/null
+++ b/docs/methods/primary/figure_data/jumpstate/jumpstate.dot
@@ -0,0 +1,24 @@
+
+digraph jump_aligner {
+        rankdir=LR;
+        size="6,3"
+        //node [shape = none, width=0, height=0, label=""] p1;
+        node [shape = doublecircle]; jump
+        node [shape = circle];
+        //LR_8 -> LR_5 [ label = "S(a)" ];
+{rank=min; insert1 -> delete1;}
+{rank=max; insert2 -> delete2;}
+        delete1 -> insert1;
+        insert1 -> match1;
+        delete1 -> match1;
+        match1 -> insert1;
+        match1 -> delete1;
+        match1 -> jump;
+        jump -> match2
+        jump -> insert2
+        match2 -> insert2
+        match2 -> delete2
+        insert2 -> match2
+        delete2 -> match2
+        delete2 -> insert2
+}
diff --git a/docs/methods/primary/figure_data/jumpstate/makeit.bash b/docs/methods/primary/figure_data/jumpstate/makeit.bash
new file mode 100755
index 0000000..bef5dd3
--- /dev/null
+++ b/docs/methods/primary/figure_data/jumpstate/makeit.bash
@@ -0,0 +1,2 @@
+
+dot -Teps jumpstate.dot -o jumpstate.eps
diff --git a/docs/methods/primary/figure_data/workflow/workflow.pptx b/docs/methods/primary/figure_data/workflow/workflow.pptx
new file mode 100644
index 0000000..e83a598
Binary files /dev/null and b/docs/methods/primary/figure_data/workflow/workflow.pptx differ
diff --git a/docs/methods/primary/figures/jumpstate.eps b/docs/methods/primary/figures/jumpstate.eps
new file mode 100644
index 0000000..02930b6
--- /dev/null
+++ b/docs/methods/primary/figures/jumpstate.eps
@@ -0,0 +1,563 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%Creator: graphviz version 2.36.0 (20140111.2315)
+%%Title: jump_aligner
+%%Pages: 1
+%%BoundingBox: 36 36 468 186
+%%EndComments
+save
+%%BeginProlog
+/DotDict 200 dict def
+DotDict begin
+
+/setupLatin1 {
+mark
+/EncodingVector 256 array def
+ EncodingVector 0
+
+ISOLatin1Encoding 0 255 getinterval putinterval
+EncodingVector 45 /hyphen put
+
+% Set up ISO Latin 1 character encoding
+/starnetISO {
+        dup dup findfont dup length dict begin
+        { 1 index /FID ne { def }{ pop pop } ifelse
+        } forall
+        /Encoding EncodingVector def
+        currentdict end definefont
+} def
+/Times-Roman starnetISO def
+/Times-Italic starnetISO def
+/Times-Bold starnetISO def
+/Times-BoldItalic starnetISO def
+/Helvetica starnetISO def
+/Helvetica-Oblique starnetISO def
+/Helvetica-Bold starnetISO def
+/Helvetica-BoldOblique starnetISO def
+/Courier starnetISO def
+/Courier-Oblique starnetISO def
+/Courier-Bold starnetISO def
+/Courier-BoldOblique starnetISO def
+cleartomark
+} bind def
+
+%%BeginResource: procset graphviz 0 0
+/coord-font-family /Times-Roman def
+/default-font-family /Times-Roman def
+/coordfont coord-font-family findfont 8 scalefont def
+
+/InvScaleFactor 1.0 def
+/set_scale {
+       dup 1 exch div /InvScaleFactor exch def
+       scale
+} bind def
+
+% styles
+/solid { [] 0 setdash } bind def
+/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def
+/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def
+/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def
+/bold { 2 setlinewidth } bind def
+/filled { } bind def
+/unfilled { } bind def
+/rounded { } bind def
+/diagonals { } bind def
+/tapered { } bind def
+
+% hooks for setting color 
+/nodecolor { sethsbcolor } bind def
+/edgecolor { sethsbcolor } bind def
+/graphcolor { sethsbcolor } bind def
+/nopcolor {pop pop pop} bind def
+
+/beginpage {	% i j npages
+	/npages exch def
+	/j exch def
+	/i exch def
+	/str 10 string def
+	npages 1 gt {
+		gsave
+			coordfont setfont
+			0 0 moveto
+			(\() show i str cvs show (,) show j str cvs show (\)) show
+		grestore
+	} if
+} bind def
+
+/set_font {
+	findfont exch
+	scalefont setfont
+} def
+
+% draw text fitted to its expected width
+/alignedtext {			% width text
+	/text exch def
+	/width exch def
+	gsave
+		width 0 gt {
+			[] 0 setdash
+			text stringwidth pop width exch sub text length div 0 text ashow
+		} if
+	grestore
+} def
+
+/boxprim {				% xcorner ycorner xsize ysize
+		4 2 roll
+		moveto
+		2 copy
+		exch 0 rlineto
+		0 exch rlineto
+		pop neg 0 rlineto
+		closepath
+} bind def
+
+/ellipse_path {
+	/ry exch def
+	/rx exch def
+	/y exch def
+	/x exch def
+	matrix currentmatrix
+	newpath
+	x y translate
+	rx ry scale
+	0 0 1 0 360 arc
+	setmatrix
+} bind def
+
+/endpage { showpage } bind def
+/showpage { } def
+
+/layercolorseq
+	[	% layer color sequence - darkest to lightest
+		[0 0 0]
+		[.2 .8 .8]
+		[.4 .8 .8]
+		[.6 .8 .8]
+		[.8 .8 .8]
+	]
+def
+
+/layerlen layercolorseq length def
+
+/setlayer {/maxlayer exch def /curlayer exch def
+	layercolorseq curlayer 1 sub layerlen mod get
+	aload pop sethsbcolor
+	/nodecolor {nopcolor} def
+	/edgecolor {nopcolor} def
+	/graphcolor {nopcolor} def
+} bind def
+
+/onlayer { curlayer ne {invis} if } def
+
+/onlayers {
+	/myupper exch def
+	/mylower exch def
+	curlayer mylower lt
+	curlayer myupper gt
+	or
+	{invis} if
+} def
+
+/curlayer 0 def
+
+%%EndResource
+%%EndProlog
+%%BeginSetup
+14 default-font-family set_font
+1 setmiterlimit
+% /arrowlength 10 def
+% /arrowwidth 5 def
+
+% make sure pdfmark is harmless for PS-interpreters other than Distiller
+/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse
+% make '<<' and '>>' safe on PS Level 1 devices
+/languagelevel where {pop languagelevel}{1} ifelse
+2 lt {
+    userdict (<<) cvn ([) cvn load put
+    userdict (>>) cvn ([) cvn load put
+} if
+
+%%EndSetup
+setupLatin1
+%%Page: 1 1
+%%PageBoundingBox: 36 36 468 186
+%%PageOrientation: Portrait
+0 0 1 beginpage
+gsave
+36 36 432 150 boxprim clip newpath
+0.840467 0.840467 set_scale 0 rotate 46.8333 46.8333 translate
+% jump
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+253 78 29.47 29.47 ellipse_path stroke
+1 setlinewidth
+0 0 0 nodecolor
+253 78 33.5 33.5 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+238.5 74.3 moveto 29 (jump) alignedtext
+grestore
+% insert2
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+469 46 34.39 34.39 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+450.5 42.3 moveto 37 (insert2) alignedtext
+grestore
+% jump->insert2
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 285.76 71.52 moveto
+297.16 69.31 310.13 66.9 322 65 curveto
+356.28 59.5 395.28 54.49 424.34 50.99 curveto
+stroke
+0 0 0 edgecolor
+newpath 425.06 54.43 moveto
+434.58 49.78 lineto
+424.23 47.48 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 425.06 54.43 moveto
+434.58 49.78 lineto
+424.23 47.48 lineto
+closepath stroke
+grestore
+% match2
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+359 111 37.09 37.09 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+338.5 107.3 moveto 41 (match2) alignedtext
+grestore
+% jump->match2
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 284.92 87.81 moveto
+293.99 90.69 304.09 93.9 313.82 96.98 curveto
+stroke
+0 0 0 edgecolor
+newpath 312.78 100.32 moveto
+323.37 100.01 lineto
+314.9 93.65 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 312.78 100.32 moveto
+323.37 100.01 lineto
+314.9 93.65 lineto
+closepath stroke
+grestore
+% insert1
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+37 34 34.39 34.39 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+18.5 30.3 moveto 37 (insert1) alignedtext
+grestore
+% delete1
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+37 123 36.29 36.29 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+17 119.3 moveto 40 (delete1) alignedtext
+grestore
+% insert1->delete1
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 58.73 60.77 moveto
+62.41 68.72 63.62 76.67 62.35 84.62 curveto
+stroke
+0 0 0 edgecolor
+newpath 58.96 83.74 moveto
+59.57 94.32 lineto
+65.69 85.66 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 58.96 83.74 moveto
+59.57 94.32 lineto
+65.69 85.66 lineto
+closepath stroke
+grestore
+% match1
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+147 78 37.09 37.09 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+126.5 74.3 moveto 41 (match1) alignedtext
+grestore
+% insert1->match1
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 70.99 41.17 moveto
+81.94 45.11 94.26 50.03 105.7 55.03 curveto
+stroke
+0 0 0 edgecolor
+newpath 104.5 58.33 moveto
+115.06 59.22 lineto
+107.36 51.94 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 104.5 58.33 moveto
+115.06 59.22 lineto
+107.36 51.94 lineto
+closepath stroke
+grestore
+% delete1->insert1
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 14.43 94.32 moveto
+11.15 86.37 10.34 78.42 12.01 70.47 curveto
+stroke
+0 0 0 edgecolor
+newpath 15.4 71.36 moveto
+15.27 60.77 lineto
+8.76 69.13 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 15.4 71.36 moveto
+15.27 60.77 lineto
+8.76 69.13 lineto
+closepath stroke
+grestore
+% delete1->match1
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 68.37 104.21 moveto
+78.5 99.43 90.02 94.45 101.02 90.07 curveto
+stroke
+0 0 0 edgecolor
+newpath 102.45 93.28 moveto
+110.51 86.4 lineto
+99.92 86.75 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 102.45 93.28 moveto
+110.51 86.4 lineto
+99.92 86.75 lineto
+closepath stroke
+grestore
+% delete2
+gsave
+1 setlinewidth
+0 0 0 nodecolor
+469 135 36.29 36.29 ellipse_path stroke
+0 0 0 nodecolor
+14 /Times-Roman set_font
+449 131.3 moveto 40 (delete2) alignedtext
+grestore
+% insert2->delete2
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 490.73 72.77 moveto
+494.41 80.72 495.62 88.67 494.35 96.62 curveto
+stroke
+0 0 0 edgecolor
+newpath 490.96 95.74 moveto
+491.57 106.32 lineto
+497.69 97.66 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 490.96 95.74 moveto
+491.57 106.32 lineto
+497.69 97.66 lineto
+closepath stroke
+grestore
+% insert2->match2
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 441.99 67.52 moveto
+430.18 75.45 415.9 84.29 402.6 91.98 curveto
+stroke
+0 0 0 edgecolor
+newpath 400.55 89.11 moveto
+393.58 97.09 lineto
+404 95.2 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 400.55 89.11 moveto
+393.58 97.09 lineto
+404 95.2 lineto
+closepath stroke
+grestore
+% delete2->insert2
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 446.43 106.32 moveto
+443.15 98.37 442.34 90.42 444.01 82.47 curveto
+stroke
+0 0 0 edgecolor
+newpath 447.4 83.36 moveto
+447.27 72.77 lineto
+440.76 81.13 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 447.4 83.36 moveto
+447.27 72.77 lineto
+440.76 81.13 lineto
+closepath stroke
+grestore
+% delete2->match2
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 432.57 133.51 moveto
+423.26 131.82 413.12 129.65 403.44 127.29 curveto
+stroke
+0 0 0 edgecolor
+newpath 404.14 123.85 moveto
+393.58 124.76 lineto
+402.4 130.63 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 404.14 123.85 moveto
+393.58 124.76 lineto
+402.4 130.63 lineto
+closepath stroke
+grestore
+% match1->jump
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 184.08 78 moveto
+192.26 78 201.01 78 209.43 78 curveto
+stroke
+0 0 0 edgecolor
+newpath 209.46 81.5 moveto
+219.46 78 lineto
+209.46 74.5 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 209.46 81.5 moveto
+219.46 78 lineto
+209.46 74.5 lineto
+closepath stroke
+grestore
+% match1->insert1
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 110.64 69.97 moveto
+99.5 65.9 87.16 60.9 75.86 55.9 curveto
+stroke
+0 0 0 edgecolor
+newpath 77.21 52.67 moveto
+66.66 51.72 lineto
+74.31 59.04 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 77.21 52.67 moveto
+66.66 51.72 lineto
+74.31 59.04 lineto
+closepath stroke
+grestore
+% match1->delete1
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 114.95 97.11 moveto
+104.75 101.91 93.21 106.87 82.23 111.22 curveto
+stroke
+0 0 0 edgecolor
+newpath 80.84 108.01 moveto
+72.77 114.87 lineto
+83.36 114.54 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 80.84 108.01 moveto
+72.77 114.87 lineto
+83.36 114.54 lineto
+closepath stroke
+grestore
+% match2->insert2
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 388.38 87.9 moveto
+400.46 79.88 414.78 71.09 427.91 63.58 curveto
+stroke
+0 0 0 edgecolor
+newpath 429.77 66.55 moveto
+436.78 58.61 lineto
+426.34 60.45 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 429.77 66.55 moveto
+436.78 58.61 lineto
+426.34 60.45 lineto
+closepath stroke
+grestore
+% match2->delete2
+gsave
+1 setlinewidth
+0 0 0 edgecolor
+newpath 396.26 112.64 moveto
+405.54 114.34 415.6 116.51 425.18 118.86 curveto
+stroke
+0 0 0 edgecolor
+newpath 424.37 122.27 moveto
+434.92 121.37 lineto
+426.11 115.49 lineto
+closepath fill
+1 setlinewidth
+solid
+0 0 0 edgecolor
+newpath 424.37 122.27 moveto
+434.92 121.37 lineto
+426.11 115.49 lineto
+closepath stroke
+grestore
+endpage
+showpage
+grestore
+%%PageTrailer
+%%EndPage: 1
+%%Trailer
+end
+restore
+%%EOF
diff --git a/docs/methods/primary/figures/workflow.eps b/docs/methods/primary/figures/workflow.eps
new file mode 100644
index 0000000..22e19d7
--- /dev/null
+++ b/docs/methods/primary/figures/workflow.eps
@@ -0,0 +1,2251 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%Creator: cairo 1.14.1 (http://cairographics.org)
+%%CreationDate: Tue Sep 29 15:21:48 2015
+%%Pages: 1
+%%DocumentData: Clean7Bit
+%%LanguageLevel: 2
+%%BoundingBox: 0 -1 316 283
+%%EndComments
+%%BeginProlog
+save
+50 dict begin
+/q { gsave } bind def
+/Q { grestore } bind def
+/cm { 6 array astore concat } bind def
+/w { setlinewidth } bind def
+/J { setlinecap } bind def
+/j { setlinejoin } bind def
+/M { setmiterlimit } bind def
+/d { setdash } bind def
+/m { moveto } bind def
+/l { lineto } bind def
+/c { curveto } bind def
+/h { closepath } bind def
+/re { exch dup neg 3 1 roll 5 3 roll moveto 0 rlineto
+      0 exch rlineto 0 rlineto closepath } bind def
+/S { stroke } bind def
+/f { fill } bind def
+/f* { eofill } bind def
+/n { newpath } bind def
+/W { clip } bind def
+/W* { eoclip } bind def
+/BT { } bind def
+/ET { } bind def
+/pdfmark where { pop globaldict /?pdfmark /exec load put }
+    { globaldict begin /?pdfmark /pop load def /pdfmark
+    /cleartomark load def end } ifelse
+/BDC { mark 3 1 roll /BDC pdfmark } bind def
+/EMC { mark /EMC pdfmark } bind def
+/cairo_store_point { /cairo_point_y exch def /cairo_point_x exch def } def
+/Tj { show currentpoint cairo_store_point } bind def
+/TJ {
+  {
+    dup
+    type /stringtype eq
+    { show } { -0.001 mul 0 cairo_font_matrix dtransform rmoveto } ifelse
+  } forall
+  currentpoint cairo_store_point
+} bind def
+/cairo_selectfont { cairo_font_matrix aload pop pop pop 0 0 6 array astore
+    cairo_font exch selectfont cairo_point_x cairo_point_y moveto } bind def
+/Tf { pop /cairo_font exch def /cairo_font_matrix where
+      { pop cairo_selectfont } if } bind def
+/Td { matrix translate cairo_font_matrix matrix concatmatrix dup
+      /cairo_font_matrix exch def dup 4 get exch 5 get cairo_store_point
+      /cairo_font where { pop cairo_selectfont } if } bind def
+/Tm { 2 copy 8 2 roll 6 array astore /cairo_font_matrix exch def
+      cairo_store_point /cairo_font where { pop cairo_selectfont } if } bind def
+/g { setgray } bind def
+/rg { setrgbcolor } bind def
+/d1 { setcachedevice } bind def
+%%EndProlog
+%%BeginSetup
+%%BeginResource: font ArialMT
+11 dict begin
+/FontType 42 def
+/FontName /ArialMT def
+/PaintType 0 def
+/FontMatrix [ 1 0 0 1 0 0 ] def
+/FontBBox [ 0 0 0 0 ] def
+/Encoding 256 array def
+0 1 255 { Encoding exch /.notdef put } for
+Encoding 32 /space put
+Encoding 40 /parenleft put
+Encoding 41 /parenright put
+Encoding 45 /hyphen put
+Encoding 63 /question put
+Encoding 65 /A put
+Encoding 66 /B put
+Encoding 67 /C put
+Encoding 68 /D put
+Encoding 69 /E put
+Encoding 70 /F put
+Encoding 71 /G put
+Encoding 73 /I put
+Encoding 77 /M put
+Encoding 80 /P put
+Encoding 83 /S put
+Encoding 84 /T put
+Encoding 86 /V put
+Encoding 97 /a put
+Encoding 98 /b put
+Encoding 99 /c put
+Encoding 100 /d put
+Encoding 101 /e put
+Encoding 102 /f put
+Encoding 103 /g put
+Encoding 104 /h put
+Encoding 105 /i put
+Encoding 107 /k put
+Encoding 108 /l put
+Encoding 109 /m put
+Encoding 110 /n put
+Encoding 111 /o put
+Encoding 112 /p put
+Encoding 114 /r put
+Encoding 115 /s put
+Encoding 116 /t put
+Encoding 117 /u put
+Encoding 118 /v put
+Encoding 121 /y put
+Encoding 122 /z put
+Encoding 146 /quoteright put
+/CharStrings 42 dict dup begin
+/.notdef 0 def
+/C 1 def
+/o 2 def
+/n 3 def
+/s 4 def
+/t 5 def
+/r 6 def
+/u 7 def
+/c 8 def
+/space 9 def
+/b 10 def
+/e 11 def
+/a 12 def
+/k 13 def
+/d 14 def
+/g 15 def
+/p 16 def
+/h 17 def
+/f 18 def
+/m 19 def
+/S 20 def
+/l 21 def
+/A 22 def
+/i 23 def
+/parenleft 24 def
+/B 25 def
+/M 26 def
+/parenright 27 def
+/P 28 def
+/z 29 def
+/G 30 def
+/D 31 def
+/v 32 def
+/V 33 def
+/I 34 def
+/F 35 def
+/E 36 def
+/quoteright 37 def
+/T 38 def
+/y 39 def
+/question 40 def
+/hyphen 41 def
+end readonly def
+/sfnts [
+<00010000000900800003001063767420a11cd7eb00003f2c000006546670676dcc79599a0000
+45800000066e676c796650c738bf0000009c00003e9068656164e43a05f200004bf000000036
+68686561123308f300004c2800000024686d7478b0b8109e00004c4c000000a86c6f63610005
+a06400004cf4000000ac6d617870058505df00004da0000000207072657025d64dbf00004dc0
+00000bbe00020100000005000500000300070000211121112521112101000400fc2003c0fc40
+0500fb002004c000000000010066ffe7057605d3001d00d3b563026a1d0201b8ffe8b40b0b06
+5500b8ffe8405f0b0b06552000320d63007000741d8000841d90009a05ab03a50db903b40dc7
+0dd000e41df31d110e121d111d1d032a0628112a1c201f470d56145715561968056b1d7b128b
+129a03990e9a1ca801a402a811d50e130014001a1014101a0402b8ffdeb2283901b8ffc0402d
+2839100f0001041b131e0c031b1e040910260f4a0026200101011a1f1726200801080c0b0b06
+5508191e635c182b4e10f42b5d4ded4e10f65d4dedf4ed003fed3fed1117393130012b2b5d5d
+71005d2b2b017201170604232224023534122433320417072626232206021514121633323604
+b4c23dfec3e5edfed79baf0143c2dc012c3bbf33c293a9e35c6de686a3e2020231effbc1016e
+d2e50155b1e0cb2da092a2feef91bbfee98abc00000000020044ffe80427043e000d0019016b
+b615180d0d065513b8ffe8b40d0d06550fb8ffe840730d0d065519180d0d065512070a190c47
+06480856065908670669080834103a123a16351845104b124b1645185c055c0952105d125d16
+52186d056d0964106d126d1664187701150906050d5b035405540a5b0c6c036505650a6c0c0a
+171c0407111c0b0b14241b400d0d02551b400b0b025507b8ffea40110f0f025507180d0d0255
+07100b0b025507b8fff0b40b0b065507b8fff0b40d0d065507b8fff0b40f0f065507b8fff0b4
+0c0c065507b8ffc04013242534300701000710072007030731df1b011bb8ffc040491e233430
+1b011b0e24000c0e0f025500120d0d0255000c0c0c0255001c0b0b0255000e0b0b0655000e0d
+0d0655000c1010065500160c0c065500402425341f003f000200311a3437182b10f65d2b2b2b
+2b2b2b2b2b2bed10712b5df65d5d2b2b2b2b2b2b2b2b2b2bed003fed3fed313001715d007143
+5c584009530553096205620904015d59002b2b2b2b1310373633320015140606232200131416
+33323635342623220644a489c5db01167beb8bdffeedb9b28786b2b38587b2021301278e76fe
+e1fdcdeb82011e010dcccbccd1c5cbca000000010087000003e6043e0016017d401305030613
+02a810b810e303e713f003f6130604b8fff0403c0b0d347910019810d018e018ff1804200814
+0e1416121c05070106160d0a0d0e0c0e2418401010025518400b0b02550b28101002550b140e
+0e02550bb8ffec40110d0d02550b040c0c02550b220b0b02550bb8fff4400b0b0b06550b1410
+1006550bb8fff9400b0d0d06550b0a0f0f06550bb8fff640120c0c06550b40333634ff0b01ff
+0b010b4e18b8ffc0401a343634b018f018027018a018b018c01804180302331516250100b8ff
+f6b41111025500b8fffab41010025500b8fffa40170e0e025500040c0c0255000a0b0b025500
+040b0b065500b8fffa40110f0f065500020c0c065500040d0d065500b8ffc04012333634f000
+0100002000d000e00004004e1710f65d712b2b2b2b2b2b2b2b2b2b3cfd3cf43c105d712bf65d
+712b2b2b2b2b2b2b2b2b2b2b2b2bed3c103c003f3c3f3fed1139011239313043794016061109
+0a080a070a0306102611060e1b010f0a121b01002b012b2b2a81015d71002b5d713311331536
+33321616171615112311342626232206151187a275dd60a150100ab42a6b4873a7042697af45
+704d327dfd7302866e6d4192ccfdbc0000000001003fffe803b1043e00300317407b04221422
+3a094a094424562265227c098e098424a613ab2cc2030d09171a1817304b2cd617051b025502
+021032010a185c085c095c0a5c0b5c0c5c0d6a086a096a0a6a0b6a0c6a0db426b4270f272624
+27242936245a0a590b64266428742374248024930a9c0c9228972c9530a40aa90ca327a428b3
+26c5261628b8fff4b40d0d065522b8fff4b40d0d065523b8fff4b40d0d065524b8fff4b40d0d
+065528b8fff4b40c0c065522b8fff4b40c0c065523b8fff4b40c0c065524b8fff4b40c0c0655
+1db8ffde40121e395a0827250c0a041a202615040b2e1d1ab802aa4022192c0b0b02551f193f
+194f195f19af19cf19060f191f196f19df19041f198f190219bd02550015000002aa0001ffc0
+40140b0b025510014001021001d00102000110010201b8ffc0b314163401b8ffc040100e1134
+01012e5c1d6c1d021d1c150704b8fff4b40b0b025504b8ffe6b41010065504b8ffe640130f0f
+0655041c2e0b1f1a011a24194013183432b8ffc0402f0f0f025519180f0f025519180d0d0255
+19160c0c025519201010065519200f0f065519100c0c065519160d0d065519b8025bb207242a
+b8ffc0b51c39d02a012ab8ffe6b40c0c02552ab8ffe8b40f0f02552ab8ffe8b40c0c06552ab8
+ffeab60d0d06552a1a32b8ffc04021272a346032c032023f3280320232100101012400180d0d
+025500100d0d06550020b8fff4b40d0d025520b8fff4b41010065520b8fff440190f0f065520
+240f100b0b02550f160c0c02550f200d0d02550fb8fffa40200f0f02550f0e0c0c06550f0c0d
+0d06550f22df00013f004f00020019313437182b4e10f45d714df42b2b2b2b2b2bed2b2b2b10
+2b2bed724e105d712bf62b2b2b2b712b4dedf42b2b2b2b2b2b2b2b2bed72003fed2b2b2b3fed
+7112392f2b2b5d71722be410fd5d71722be41112391112390111121739313043794040272d1e
+2305142c261110121013100306220d201b000928071b01052d071b011e14201b00210e231b00
+22230d0c08290a1b012827090a062b041b001f101d1b01002b2b103c103c2b103c103c2b012b
+2b2b2b2a2b818181002b2b2b2b2b2b2b2b2b5d71015d72715d1337161633323635342726272e
+023534363736363332161617072626232206151417161716171e02151406062322263fb20f89
+7b7c78352593c6994f41382a91537dbd5a11b00c73697c6a16162f1b84bf975669c67dcfd901
+3d1c6b7265443d2318253249814e4779281f2b487b6718525c5237231c1d130a2433417c5c5a
+9f57ac0000010024fff2022a0599001700d8b9000affc0b323263409b8ffc040412326348019
+0100010c0d0a0103001610092b0f0a06161c030b0f10220022010d12250c01ff070845094560
+077007800790070400072007a007b007c007d0070607b8ffeeb41010025507b8fff4b40f0f02
+5507b8fff2b40e0e025507b8fff8b40d0d025507b8fff8b40c0c025507b8fffab41010065507
+b8fff0400b0f0f065507060c0c065507b8ffe8b40d0d065507ba026a00180136b166182b10f6
+2b2b2b2b2b2b2b2b2b5d71f4e410ed3cfd3c10e4f43c003fed3f3cfd3c1139123911333310c9
+3130015d2b2b25170623222626351123353311371133152311141616333202101a4c3c626c2c
+8484b3b5b5132b281ea19f103e65a202638c01076cfe8d8cfd934d2c1a0000010085000002c6
+043e001100c9403b2f1301100401230434044304530466047404060911080908090d1311090d
+000308010b1c06070106000a0928900801082220130113022211250100b8ffc04010333634f0
+000100002000d000e0000400b8fff8b41010025500b8fff840110e0e025500040c0c02550006
+0b0b025500b8fffcb41010065500b8fff440160f0f065500060c0c065500080d0d0655004e12
+47c4182b10f62b2b2b2b2b2b2b2b5d712b3cfde4105df472e4003f3f3fed1139391139390111
+1239390010c9870e7dc43130005d72015d33113315363633321707262322060706151185a23e
+693f5b5e3e42423b5e141e0426a171483aa727473f6072fdd400000000010083ffe803e00426
+0018014fb9001affc0400915173402201316340fb8fff040331214342b1301240813160c0113
+160b06000a111c030b003316251817403336341a401010025517281010025517120e0e025517
+b8ffec400b0d0d025517040c0c025517b8fff4400b0b0b065517141010065517b8fff8400b0d
+0d0655170c0f0f065517b8fff6400d0c0c0655ff1701c01701174e1ab8ffc04015343634b01a
+f01a02701aa01ab01aff1a041a0c2509b8ffc04010333634f0090100092009d009e0090409b8
+fff8b41010025509b8fff840110e0e025509040c0c0255090a0b0b065509b8fff640160f0f06
+5509020c0c065509020d0d0655094e194750182b10f62b2b2b2b2b2b2b5d712bed105d712bf6
+5d712b2b2b2b2b2b2b2b2b2b2b3cfde4003fed3f3f3c39390111123931304379401a04100e0d
+0f0d0206070806080508030610040c1b000d08111b00002b012b2a2a81005d012b2b2b213506
+23222626272635113311141716163332363635113311033f7cd55ea34f100bb40b116e51518e
+3bb49cb4486d4f35730292fdb38d314751538f880239fbda000000010050ffe803ed043e001a
+015ab1020243545840340e7f0f010f0b01400050007000030004121c0b07181c040b010e1507
+080e0e0255070c0d0d0255070c0c0c025507100b0b0255072f2b2b2b2bcdd4c6003fed3fed10
+c45d3210c45d3231301b4047090c011f1c4313431753135317601360179b029b039a0da410a4
+1a0c080d190a6a0269036a05750c700d800da60cb509b60ab50c0c160c860ce302030e225f0f
+6f0f7f0f030f01b802aa4079300040005000600070009000a000e000f00009000f0f0b000004
+121c0b07181c040b1c0f010f240e080d0d06550e221b000100240b2b1f010100010101400b0b
+065501401010065501480c0c0655011a0d0d065501491c1524cf07011f073f0702070e0b0b06
+55070a1010065507120c0c065507311b34c4182b10f62b2b2b5d71ed10f62b2b2b2b5d724b53
+234b515a58b90001ffc03859ed72f42bed72003fed3fed12392f11392f105de4105de4313000
+5d71015d7159011706062322001134123633321617072626232206151416333236033cb11def
+aedafef772e989addc1faf197f5a88aaa4846a8e018517b7cf011d010aac010281afa11b6b6c
+c3d3d6c2820000020086ffe8041f05ba0010001d0180409b01050c0f240535054505053f1fb0
+1f021f1f221c331c421c701f901f063a133c163c1a4c164c1a5d085d0d580f5d165e1a6a086c
+0d680f6e166e1ac01fd90cda17da19e213ec17ec19e31de01fff1f1920052f0f2f1430053f0f
+40054c0f50056605da1df504fa100c10150e040602001b1c0607010a151c0e0b1824d00b0110
+0b400b600b800b041f400d0d02550b0c0f0f02550b180d0d02550bb8fff6b40c0c02550bb8ff
+f0b40b0b06550bb8fff4b40f0f06550bb8ffe0b40c0c06550bb8fff4402f0d0d06550b740111
+3300040c0c025500040d0d0655003303250202c001019001a001b001f001041f013f014f0103
+01b8fffeb41010025501b8fffc401d0e0e0255010c0d0d025501100c0c025501120b0b025501
+0c0b0b065501b8fff8b41010065501b8fffc40160f0f065501180c0c065501140d0d06550119
+1e4737182b4e10f42b2b2b2b2b2b2b2b2b2b5d71723c4d10fdf42b2be410fd2b2b2b2b2b2b2b
+2b5d71ed003fed3f3fed3f11391112393130005d015d7172007121231133113633321e021510
+0023222703141716333236353426232206012da7b472b162af7140fef2bdbc6b0234559176ac
+a57576ac05bafdf58f4f8fca73feeffed69d0196bf558bcdcbd0c6cd00000002004bffe8041e
+043e0015001d015340171f001c150255035d055d09550b65036b056f09650b0815b8ffe4b40d
+0d065511b8ffe440520d0d06551d1c0d0d06552712d905fa14f61a0431123a19311c41124d1a
+411c51125c19521c61126d1a611c78067815f602f618100016010f0d17175016601670160316
+1c0f9010a010021010041b1c0a0700ba02aa0001ffc0b41010025501b8ffc040101010065510
+01010195131c040b17400db8ffdcb40d0d02550db8ffeeb40d0d06550db8ffeab40c0c06550d
+b8ffc04009272a34b00d010d1a1fb8ffc0b32526341fb8ffc040411e2334301f011f16331024
+0740242a341f073f074f070307200b0b025507180c0c0255071c0d0d0255070e0b0b0655071c
+0c0c065507160d0d065507191e3437182b4e10f42b2b2b2b2b2b5d2b4dfde44e10712b2bf671
+2b2b2b2b4ded003fedfd5d2b2be43fed12392f5d3cfd713c011112393912393130015d005d2b
+2b2b017172011706062322001110003332001114072116163332360121262726232206035eba
+2ceeb9e9feef0114dcd5010e01fce80ab285638cfdda02510c3856897ca9015617a3b4011f01
+03010c0128fedefef91020afba680195864368a600000002004affe8041c043e00280037022d
+402c090d092a190d1a2a290d2a2a390d3615371b3a2a492a5d0d5d2a6a0d692a60308a0d8629
+9a169b1aa90d1528b8ffe8b40b0b065527b8ffe840190b0b0655a619aa28b619bb28c419cf28
+d215dd28084416011eb8fff440110c0c065512120c0c0655050c0c0c065535b8ffe040550c0c
+06551f171f182b2c2a343904392c4904482c5608592b6608692b760c870cc90cf90df92b1137
+340e0104102f243417322114185f296f2902291c2f0e3f0e8f0e9f0eff0e059f0eaf0eef0e03
+0e0c0f0f02550eb8ffeab4101002550eb8fff44015101006550e0c0d0d06550e060f0f06550e
+0e1c0317b802aab61895141c1c0700b8fff4401a0c0c06550045270a321c030b296110610006
+0d0d025500252124b8ffecb41010025524b8ffec400b0d0d025524040c0c025524b8ffe4b40b
+0b025524b8fff4b40b0b065524b8ffdc400b1010065524060f0f065524b8fffcb40c0c065524
+b8025b400e27400026102620263026af260539b8ffc0b40e0e025526b8ffd6b60e0e02552631
+39b8ffc0400d1e23343039c03902a039013917b8fff4404110100655172518222f24bf06cf06
+021f063f0602060e0f0f0255060c0d0d025506180c0c0255060c0b0b0255060c0b0b0655060e
+0d0d065506100c0c065506313810f62b2b2b2b2b2b2b5d71edf4ed2b105d712bf62b2b5dedf4
+2b2b2b2b2b2b2b2b3cfd2be5e5003fed3fe42b3fedfde41112392f2b2b2b2b2b5d71ed711112
+391112393901111217393130005d2b2b2b2b01715d2b2b007125060623222635343636373637
+3637363534272623220607273e02333216161716151514161723260306070e02151416333236
+373635033c64b96aafbc477348356bda67013345887f791db0186ed08988aa5010091722bc1c
+1762c46f5c326d6968a2261d835546ab854e814e140e0d1a24250a6e2d3d597118718b4b4061
+4a2e78f0fb853d3801dd281c10284d2f48605b4f3d77000000010088000003f805ba000b0261
+401b060c0d0d0655070656065a09030f0df305f60603090c1010025506b8fff4b40c0c02550a
+b8fff4b40c0c025509b8fff4b40c0c025503b8ffe840100d0d06555503770a02120620132134
+08b8fff0b312273409b8fff0b41227341205b8fff0b312213409b8fff0408412273406040405
+0406370947040525062d0a580a7703750ada03e30607a60601230626072508390638093f0d4f
+0d59045906580759097d0479059909c606d204d606e406e907f706f90815120a0a0503030402
+0606070909080a0a050908082507061407070603040425050a1405050a0a0906030408010200
+0405060708080b0b000a04b8010f400905040c0c0655052208b8010f402120073f070207100c
+0c0655071a900d010d0b2500022501019000013f004f000200b8fffe40310e0e025500100d0d
+025500100c0c0255000a0b0b025500120b0b065500120c0c065500080d0d065500190c0de121
+4766182b2b4ef42b2b2b2b2b2b2b5d713c4d10ed10ed4e1071f62b5d4dedf42bed003f3c103c
+103c3f3c3f3c11173987052e2b047d10c487052e182b0e7d10c40710083c083c0310083c083c
+b10602435458400d4b09011f0984030209180d1134002b5d7159313001435c58400a092c1d39
+09081d1d3c06b8ffdeb21d3906b8ffd4b2203906b8ffd4b121392b2b2b2b2b595d00715d0171
+002b2b435c58b90006ffc0b2213903b8ffc0b2163903b8ffdeb2103906b8ffdeb2103903b8ff
+deb20c3903b8ffdeb10b392b2b2b2b2b2b59012b2b2b435c584012dd04010814163909081414
+3c090814143c06b8fff6b2183906b8ffecb11b392b2b2b2b2b015d59005d2b2b2b2b2b015d71
+2b33113311013301012301071188b401aae9fe6a01bfdefea17f05bafcbc01b0fe76fd64021f
+7afe5b0000020046ffe803df05ba0011001d015540a40a02040d250d340d440d053514351c57
+02540a5214531c6702640565096314601cc01fd405d513dd19e513e514ef17eb19e51de01fff
+1f161f1f2b1a3c163c1a4b1a701f901f072e02240d2e163a02350d4b02450d4614491c570a56
+0d670de506e716fa01f40e100115030e0b100f001b1c0b0711000a151c030b1833010025110f
+251010d011011011401160118011041f400b0b02551f400d0d025511121010025511b8fff440
+110f0f025511060e0e025511180d0d025511b8fff2400b0b0b0655110e1010065511b8ffeeb4
+0c0c065511b8fff840420d0d065511741224bf07cf07df07ff07041f073f074f0703071e0b0b
+025507180c0c0255071e0d0d0255070c0b0b0655070c0d0d0655071a0c0c065507191e345018
+2b4e10f42b2b2b2b2b2b5d714dedfd2b2b2b2b2b2b2b2b2b2b5d713c10ed10fd3ce4003fed3f
+3c3fed3f3c11391112393130005d01715d007121350623222626353412363332161711331101
+1416333236353426232206033865c47fd5756ad48360962fb3fd20ac7576a5a87b78a1869e8c
+fba39f01038a5141020efa460212cccac1c6daccc40000020042fe5103ea043e001e002a016f
+40600b0b05142c0b25144c0b451406091d191d2c0b26142c23390b36144a0b46145607580b68
+0bfa0af5150e2e232c273e233e274c27902ca02c07362136293f2c460b462145295421542969
+0763216329602c802cda27e821ee23ef271117160615b802b1b4281c130701b802aa40102000
+3000600070008000c000d0000700b8027d4032051c1c0f0a45221c0c0a16153325330a251818
+d017011017401760178017042c400b0c02552c400d0d025517121010025517b8fff440110f0f
+025517060e0e025517160d0d025517b8ffea400b0b0b065517121010065517b8ffeeb40c0c06
+5517b8fffc404a0d0d065517740f012500221f24bf0fcf0fdf0fff0f041f0f3f0f4f0f030f20
+0b0b02550f1a0c0c02550f220d0d02550f1c0b0b06550f0c0d0d06550f1a0c0c06550f192b2c
+74213450182b2b4ef42b2b2b2b2b2b5d714dedf4ed10fd2b2b2b2b2b2b2b2b2b2b5d713c10fd
+e4f63c003fede43fedfd5de43fede43f3c3130015d71005d7117171617163332363736270623
+22023534123633321735331114060623222613141633323635342623220666af0b3243747d88
+180e0176b0dbf06ed18dbc7aa665dba0beea99a67d7ca8ad7a78a8581a512532645a37b08b01
+3cdd9801018c9880fc6af8cf78ab032ad1c0bfccc3c6c30000020087fe690421043e0012001e
+0162408e0c102d103d104b10043f20b020021f20290c231d3215321d421d70209020083a173a
+1b4a174a1b59085b0c5c175c1b6a086b0c69106d176b1bc020d314dd18dd1ad31ee414e41ee0
+20ff201623042b102b1535043a1046044a105a10e50beb1dfe100b110e03161c1c0607010616
+1c0e0b000e1924d00a01100a400a600a800a0420400b0b025520400d0d02550ab8ffe6400b0f
+0f02550a180d0d02550ab8fffab40c0c02550ab8ffeeb40b0b06550ab8fff4b40f0f06550ab8
+ffe840230c0c06550a74011333023312250000c001019001a001b001f001041f013f014f0103
+01b8fffc401d0e0e025501100d0d025501100c0c025501100b0b0255010c0b0b065501b8fff6
+b41010065501b8fffc40160f0f0655010c0c0c065501120d0d065501191f473718012b4e10f4
+2b2b2b2b2b2b2b2b2b5d71723c4d10fdf4e410fd2b2b2b2b2b2b2b2b5d71ed003f3fed3f3fed
+113912393130005d015d71720071131133153636333216161514020623222627110314163332
+3635342623220687a43a926888d06a75df7b5a8f2e11a67678aba77473b1fe6905bd8a51518c
+ff98a3fefb8b4c3afdfb03a4cdc4cbd5cbcad70000010087000003e805ba00140161b90016ff
+c0b315173403b8ffe0400e0d0d0655250435034503ba0d0403b8ffe0403a1719341708110c11
+14030501000f1c0507140b0a0c250940333634ff0901c0090116400b0b025516401010025509
+281010025509140e0e025509b8ffec40110d0d025509040c0c0255091a0b0b025509b8fff640
+0b0b0b065509141010065509b8fff8400b0d0d0655090a0f0f065509b8fff6b60c0c0655094e
+16b8ffc04017343634b016f016027016a016b016ff1604160214250100b8ffc04010333634f0
+000100002000d000e0000400b8fffab41010025500b8fffa40170e0e025500040c0c02550008
+0b0b025500040b0b065500b8fffa40160f0f065500020c0c065500020d0d0655004e15475018
+2b10f62b2b2b2b2b2b2b2b5d712b3cfd3c105d712bf42b2b2b2b2b2b2b2b2b2b2b2b5d712bed
+003f3c3fed3f1139113901123931304379400e060e07250e060c1b010d080f1b01002b012b2b
+81002b5d2b012b33113311363332161615112311342623220606151187b47ec076ae4bb4756b
+508d3c05bafdf2925da49cfd5f02a1877b538e7dfdbb000100130000028005d30017010d401e
+1409010f192f193019401970199b0c9c0da90d081a0d280db019c0190419b8ffc040281a1f34
+1d080d030c0f1c0a0115022b1413040306000a9f140114ff13400417250400030292010100b8
+ffc0b331383400b8ffc0402b1c1f3490000119400f0f025519400d0e02550014101002550028
+0f0f025500220e0e0255002c0d0d025500b8fff2400b0c0c025500140b0b065500b8ffeab410
+10065500b8ffe6b40f0f065500b8fffab70c0c065500a31819bc01ba002100f6010a00182b2b
+f62b2b2b2b2b2b2b2b2b2b2b5d2b2b3c10f43c103ced10eded5d003f3f3c3c3cfd3c3fed3911
+39313043794014101106090706080602061009121b0011060f1b012b012b2a8181012b715d00
+72331123353335343736363332170726232206151533152311b29f9f131a83764c5c1b383252
+44cfcf039a8c716b344657129d0a4660628cfc660001008700000626043e002301c7b9000dff
+f4b40d0d065508b8fff4b40d0d065509b8ffd8404d0b0d342504e404e409e117e52005d505f6
+20021708202309181b20090303231e1c06151c0b0b06070106231a19100ad025019025a02502
+2517171a0e2590110111041010025511180f0f025511b8ffec400b0e0e025511140c0c025511
+b8ffe840170b0b025511020b0b0655110c1010065511060f0f065511b8fffab40c0c065511b8
+fff8b40d0d065511b8015d400c1825901b011b180f0f02551bb8ffec400b0e0e02551b140c0c
+02551bb8ffee40110b0b02551b040b0b06551b0a101006551bb8fffe400b0d0d06551b0c0f0f
+06551bb8fffcb40c0c06551bb8015d4016000233232501d000019000a000021f003f004f0003
+00b8fffe401d0e0e025500100d0d025500100c0c0255000c0b0b025500160b0b065500b8fffc
+b41010065500b8fff440140f0f0655000a0c0c0655000e0d0d065500192425b80178b3214750
+182b2b4ef42b2b2b2b2b2b2b2b2b5d71723c4dfde410f42b2b2b2b2b2b2b2b2b5dedf42b2b2b
+2b2b2b2b2b2b2b5dfd4e456544e67172003f3c3c3c3f3f3c4d10ed10ed111739011112391239
+31304379400e0c141326140c111b01120d151b01002b012b2b81015d005d2b2b2b3311331536
+3633321617363332161511231134262623220615112311342623220606151187a132a66a7697
+1f7eca9eaab3235c3e7094b458644c813a0426954e5f6258baafb6fd27029d6c5f3a95a4fd97
+02b27878509a91fdd9000001005cffe704eb05d3003002154027630363047303740404252735
+03391c430349074c1d451f44244627530359075c1d572889130e23b8fff2b41010025524b8ff
+f2b41010025525b8fff2b41010025526b8fff2b41010025527b8fff2b41010025523b8fff6b4
+0d10025524b8fff6b40d10025525b8fff6b40d10025526b8fff6b40d10025527b8fff640460d
+100255280d26240224032725360f34234425452f5a20562355256c0b6a0d6b0e66146518790b
+7a0d7a0f7d107524732586038a0b890d8a0f8d1085248325920d960f96151eb1060243545840
+2d2126121b261a09262901260000291a120432312600650002000d2d791b891b021b25160d2d
+1e27250125051605b8fff4400c0c0c0655051e2d091e1e1603003fed3fed2b1112395d111239
+1112395d1112395d01111217392fed2fed2fed2fed1b402d25240e0d0b05211c1d1e1b080706
+04030206012524220e0d0b06051e1b2d1a400c0c02558f1a011aed16002d01b8ffc040120c0c
+02551001200150016001700190010601b801b040132d1e1e1603051e2d091b261a4a09260029
+0129b8ffeab40e0e025529b8fff4400d0c0c0255291a32212612012612b8ffecb40e0e025512
+b8fff6b40d0d025512b8fff8400f0c0c02551254200001001931635b182b4e10f45d4de42b2b
+2bed10ed4e10f62b2b5d4dedf4ed003fed3fed10fd5d2be410fd5d2bf4111217391117391112
+393901121739593130005d712b2b2b2b2b2b2b2b2b2b015d7113371e02333236363534262726
+24272626353436363332161617072626232206151417160417161615140606232224265cb70d
+5fc87d6faa53505c3bfe6c5169677ef294a3f98605ba0fada9b0a1393801d958807a86fb9dc7
+fef39901d7106e8d5742734445672317612b37a3656fc16469cc810e8b8e815b4f33336b283b
+b57675cf7374e9000000000100830000013705ba000300e3b605360b0b025505b8ffc0b33738
+3405b8ffc0b334353405b8ffc0b330313405b8ffc0b322253405b8ffc040251517340f051f05
+9f05df05044f05df05f005031f0570058005ff05040100000a0203250100b8ffc0b337383400
+b8ffc040153335349f0001c000f0000200002000d000e0000400b8fff8b41010025500b8fffa
+401d0e0e025500040c0c0255000a0b0b025500140b0b065500081010065500b8fffeb40d0d06
+5500b8ffffb40c0c065500b8fffc400a0c0c0655004e044750182b10f62b2b2b2b2b2b2b2b2b
+5d71722b2b3cfd3c003f3f3130015d71722b2b2b2b2b2b3311331183b405bafa46000002fffd
+0000055905ba0007000e0167b6010e0f10025502b8fff2b40f10025502b8fff8b40d0d065502
+b8fff440590c0c0655090c0c0c0655050c0c0c06552f10301067086809601088039010c905c6
+06c010f0100b0805590156025010680bb010f30cf30df30e09040c040d040e030b0a09050404
+0c0d0e080607070c09050408060c07010000b8fff8400f0c0c02550020070c1407070c020303
+b8fff840150c0c02550320040c1404040c091e0505081e060306b80270400900080ce9400201
+0202ba010b0001010b40120c2000650703525004cf04df040390040104b80101400b500cc007
+df0c03900c010cb8010140100f07cf07027f0780070207930fd6d7182b10f45d7119f45d71f4
+5d7118ed10ed1a1910eded00183f3c1aed3fe43c10ed3c10ed87052e2b2b7d10c4872e182b2b
+7d10c401111239391139398710c4c40ec4c4870510c4c40ec4c43130014bb00b534bb01e515a
+58b4040f030807bafff00000fff838383838590172715d2b2b2b2b2b2b230133012303210313
+210326270607030233d10258ddabfd9ba1d901f19946221c3305bafa4601bcfe44025a0196b9
+778d8b000000000200880000013c05ba0003000700cd405e09360b0b02554f099009a009b009
+c009df09f0090700091f09700980099f09b009c009df09e009ff090a1f090100010704020309
+06037e0100060506040a0607250500049f04a004b004c004e00406c004f0040200042004d004
+e0040404b8fff8b41010025504b8fffa40170e0e025504040c0c0255040a0b0b025504140b0b
+065504b8ffeab41010065504b8fffeb40d0d065504b8fffc400a0c0c0655044e084750182b10
+f62b2b2b2b2b2b2b2b5d71723cfd3c003f3f3c3fed0111123939111239393130015d72712b13
+3533150311331188b4b4b404ebcfcffb150426fbda000001007cfe51026005d30010003d400a
+270f0100101207081010b80133b3009f0e08b801334011079f0e5e0003100320030303ac119d
+8c182b10f65dfdf6ed10f6ed003f3c3f3c3130015d0126021134373637330607060706151001
+01df95ce4d5abc8179273d232b012bfe51bc01f8010eeedafdfbd0598a96bbbdfe1ffe200003
+0096000004e905ba0011001d002a0113b90004fff440470b0b06550404462356236623730984
+0906691a75057009730b8305830b062716090318272a1e161d090913121e2a2a2929001c1d1e
+0201021f1e1e1100081826060c1010025506b8ffe640330f0f025506120d0d025506060c0c02
+5506080b0b0655060c0c0c065506140d0d0655065425260c1c101002550c0a0d0d02550cb8ff
+f440150b0b06550c1a2c1d1e200120000100201010025500b8fff6b40f0f025500b8fff6b40d
+0d025500b8fffab40c0c025500b8fffab40c0c065500b8fff0400a0d0d0655005d2b3b5c182b
+10f62b2b2b2b2b2b5d3cfd3c4e10f62b2b2b4dedf42b2b2b2b2b2b2bed003f3cfd3c3f3cfd3c
+12392f3c10fd3c392f1139111239011217393130015d005d2b33112132161615140607161615
+140e0223012132373636353426262321112132373e02353426262321960226a8cb736667858f
+5780c18cfe93013d81384a4b46829efedb016d5e26435a3a54958cfead05ba59b9655ea63327
+bc8067b1603103521116664d496f29fba0070c386b4652793100000100980000060f05ba0010
+02e4b10202435458b90008fff6400b0c0c0255080e0d11025502b8ffeeb40d11025505b8ffee
+40280d1102550c120c0c0255050f0c030900010208090b0e000809020a0b06101002550b100d
+0d02550bb8fffab60c0c02550b1000b8ffe6b41010025500b8fff8b40f0f025500b8fffcb40d
+0d0255002f2b2b2bcd2f2b2b2bcd003f3fc0c010d0d0c0111217392b2b3130012b2b2b001bb1
+0602435458401f07200b0b065506200b0b065503200b0b065504200b0b065505200b0b065508
+b8fff240230b0b0655020c0b0b065503060c0c0655020e0c0c0655090c0c0c06550a0c0c0c06
+5507b8fff8b40d0d065508b8fff8401f0d0d06552605010c200a12340f200a12340f050c0300
+010e0b00080801020ab8ffeeb40b0b06550ab8ffeeb40c0c06550abb0256001200100256400d
+000c0b0b065500060c0c065500b8fff8b40d0d065500012f2b2b2bf42ff42b2b003f3c3f3c3c
+111217392b2b5d3130012b2b2b2b2b2b2b2b002b2b2b2b2b1b407f00020f0814021b0804760c
+860cc80c03090c490c490f032904250d2c0e58035b04760d780e870d080b020508390d360e4f
+024b03440740084d0d420e0a9802990396079608a803a7070612020f0e0e3005021405050208
+0c0d0d300508140505080c520f5201400102020808090a0b0b0d0d0e0e100008090260128012
+0212ba02a8000d0131b2052008b80131400a0c090a20400c7f0b010bba0256000e010bb20520
+02b8010b40090f0100200f70100110b80256b72005600580050305b802a8b3113b59182b1910
+f45df45d3c18fd3c10ed1a1910edf45d3c1a18fd3c10ed1a1910ede45d00183f3f3c3c103c10
+3c103c103c103c103c1a10eded87052e2b877dc4872e182b877dc43130004bb00b534bb01e51
+5a58bd000cfffb0008ffd60002ffd638383859014bb00c534bb028515a58b9000dfff8b10e0a
+38385901435c58b9000dffd4b621390e2c21390db8ffd4b637390e3237390db8ffd4b52d390e
+2c2d392b2b2b2b2b2b5972715d00715d015d5959331121011617363701211123110123011198
+0124015b30161935015f0105bbfe56affe5805bafbf29148509b03fcfa4604cbfb3504e0fb20
+00000001007cfe51026005d300100065400c2802281002090a1001001209b80133b30a9f0301
+b80133b4009f035e0eb8fff0b4101002550eb8fff8b40f0f02550eb8ffe4b40d0d02550eb8ff
+ec400f0a0a02550f0e1f0e020eac129d8c182b10f65d2b2b2b2bfdf6ed10f6ed003f3c3f3c31
+30015d1323001134272627262733161716151002fd81012b2b223d277a81bc5a4dcffe5101e0
+01e1bcb9968a5ad2fbfddaeefef2fe0800000002009e000004fd05ba000d001800b2402c6511
+6b14024b104b145b105b14040b0c1e0f0e0e0017181e02010200081226080a0d0d025508100b
+0b065508b8fff4401b0c0c0655081a201a01201a011a180d200120000100201010025500b8ff
+f6b40f0f025500b8fff6b40d0d025500b8fffa400b0c0c0255000c0b0b065500b8fffab40c0c
+065500b8fff0400a0d0d0655005d193b5c182b10f62b2b2b2b2b2b2b5d3cfd3c4e10715df62b
+2b2b4ded003f3f3cfd3c12392f3cfd3c3130015d005d33112132171e02151402212111112132
+36353426272623219e0229924d6c9259eefec9fe88017bbc9e5d4c3184fe8905ba0e1265b66d
+bbfefdfdac03018c7f5c83150d00000000010028000003d40426000e01af400d12b802c90802
+12013212173408b8ffce4009121734013e1e213408b8ffc2404a1e2134290228092f10390139
+0a49014602460849094f105c01540254085a0950106c01630263086a097b0174087b098b0185
+088909f901f4021b1908260129082b093908a508d7010710b8ffc0b7101534022c123909b8ff
+d44023123901023a090a02080a0a25010214010102010d0e08060261052b0706060a610d000d
+b8fff440090b0b06550d2b0e0a02b8010fb40808070506bb025b00000007fff440160b0b0655
+07220da00e01000e400e600e800ef00e050eb8fff440240b0b06550e74000a7e0101af00014f
+006f00ff000300180b0b065500190f1074217cc4182b2b4ef42b5d713c4d10ed10fd2b5d713c
+e42b10f43c103c10fd003fed2b3c10e53f3cfde5113911123987052e2b877dc4100ec42b3130
+012b2b2b715d002b2b2b2b435c58b5290126080201b8ffce4009121734083212173401b8ffc2
+b71e2134083e1e2134002b2b2b2b017159015d435c58b90008ffdeb20f3909b8ffdeb20f3909
+b8ffe8b71b390908161b3d09b8fff0b2173909b8fff8400a163902141639021a16392b2b2b2b
+2b2b2b2b593335010623213521150107363321152802a47358fe4f0364fdc16f796a01eb9203
+08069277fd5e7b099b000001006dffe705b905d300250113401a1b141b15026027015e081301
+12032424002112170225001e0201b8ffc040200c0c0655010106171e0e03211e060901012627
+25242003032002202760020302b8ffe4b40f0f025502b8fff2b40d0d025502b8ffdab40c0c02
+5502b8fff4401b0c0c06550272802701271d26200a010a100c0c06550a1926635b182b4e10f4
+2b5d4ded4d105df62b2b2b2b5d3c4d10fd3c1112392f003fed3fed12392f2b3cfd3c11123911
+123901111239123931304379404404231b1c1a1c191c03060c26102515261f26082504262325
+180d1d2100160f1321011112141320071d210022052521011c0b17210114111721011e092121
+002403212100002b2b2b2b012b2b103c103c2b2b2b2b2b2b2b2b2b2a81015d005d0135251106
+0423222402353412243332041617072e02232206060706151412043332363711034c026d8ffe
+d0a0d8fe9fb4b30150db9f01019226af2162b66f85c2772138870102917ef03e023fac01fde0
+7273b9015ed8d60173b467b8943070804d51844f889fc4fef880613701110002009e0000055a
+05ba000f001d00e5402f201f0143081c1d1e02010211101e0f000817262009011f400d0d0255
+092010100255090a0f0f025509180d0d025509b8fff440150c0c0655091a1f1d102001200001
+00201010025500b8fff6b40f0f025500b8fff6b40d0d025500b8fffab40c0c025500b8fff7b4
+0c0c065500b8fff8400a0d0d0655005d1e3b5c182b10f62b2b2b2b2b2b5d3cfd3c10f62b2b2b
+2b2b5ded003f3cfd3c3f3cfd3c313043794036031b0708060805080408040619181a1802060b
+0a0c0a0d0a030615161416131603061b03172101120e17210118081c2101160a1121002b2b01
+2b2b2a2a2a2a81015d3311213217161716121514020e02232521323637363635342627262321
+9e01f9ab5a7e5974734e7a91cd85feb1013991a531454d976c4eadfecc05ba151d4c62fecfc4
+a7fefea96132ad363145e9a6e6f72a1e0001001a000003e80426000a01eab102024354584017
+0508000a080601060a0005090805010205240f0f0255052f2bddcd10ddcd113333003f3f3f12
+3931301bb7350501002211390ab8ffde400d11390916121c340816121c3402b8ffeab3121c34
+01b8ffeab3121c340ab8ffd840091e213400281e21340ab8ffe8400922253400162225340ab8
+ffda407e282e340020282e340f0c29002809260a3900350a4800470a56015602590858096601
+66026908690978007701770279087809770a870187028603890788088a099d009809910aac00
+a20abd00b707b10ac900c50ada00d50aec00e30afb00f40a2c0a00050a1800160a2800260a37
+0a4f00400a09054012163405400b0d34b10602435458400905010008060106000ab8fff4400f
+0d0d06550a000c0d0d065500050908b8fff440120d0d0655080501020c0d0d06550205050c0b
+1112392fdd2bcd10dd2bcd10cd2bcd2b002f3f3f11123931301b40370a07080825090a140909
+0a0003020225010014010100050a0a000a09080802020106070a09030001052f0c010c220840
+40400980090209b8011bb5400580050205b8011b400920024001220bead2182b10f6ed1a19fd
+5dfd5d1a18ede45d11123939123939003f3c103c103c3f3c113987052e2b877dc4872e182b87
+7dc4593130002b2b01715d2b2b2b2b2b2b2b2b2b2b2b2b005d59210133131617363713330101
+aefe6cbee4251f182becb9fe6e0426fd84676f54760288fbda000000000100090000054605ba
+000a013eb1020243545840120501000802010200080a00050908050102052fddcd10ddcd1133
+33003f3f3f11123931301b40242f05012a002803250a2f0c300c600c89088909900cc00cf00c
+0b200c500c0204020b0802b10602435458b709010c0b00080102003f3f01111239391b40240a
+0909200805140808050001012002051402020509010205e9200a000809650801650208b8ffc0
+400b2839500801800890080208b80101400d024028395f02018f029f020202b8010140112005
+500502300560059005c005f0050505b80288b30b60a8182b1910f45d71e45d712be45d712b18
+10ed10ed003f3c1a19ed183f3c87052e2b7d10c4872e182b7d10c4014bb00b534bb014515a58
+b2000f0ab8fff1b2091201b8fff1b2081402b8ffee38383838383859014bb028534bb036515a
+58b90000ffc03859593130015d715d005d5921013301161736370133010241fdc8d2017d2e1f
+222d018cc6fdc205bafbd7807078780429fa4600000100bf0000018105ba000300ccb5010200
+080205b8ffc0b3383d3405b8ffc0b333343405b8ffc0b32d303405b8ffc0b328293405b8ffc0
+b323253405b8ffc0b31d1e3405b8ffc0b3181a3405b8ffc0402a0d103420059005af05030320
+0100008f00a000b000042f0040005000df00f000051220008f0090000305b8ffc0400b0d0d02
+5500181010025500b8ffecb40f0f025500b8ffeeb40d0d025500b8fff640100c0c025500200b
+0b065500a204d659182b10f62b2b2b2b2b2b5d435c58b2800001015d5971723cfd5d2b2b2b2b
+2b2b2b2b3c003f3f313033113311bfc205bafa46000100a80000048505ba0009008d402b0605
+1e08088f070107070003041e0201020008079c2002200b02021a0b0409200120000100201010
+025500b8fff6b40f0f025500b8fff6b40d0d025500b8fffa400b0c0c0255000c0b0b065500b8
+fffeb40c0c065500b8fff0400a0d0d0655005d0a3b5c182b4e10f42b2b2b2b2b2b2b5d3c4dfd
+3c4e10f65d4de4003f3f3cfd3c12392f5d3c10fd3c313033112115211121152111a803ddfce5
+02b0fd5005baadfe3aadfd660000000100a2000004e805ba000b0095401506051e0808070700
+03041e0201020a091e0b000807b8ffc0401d1012340754034a200a200d020a1a0d0409200120
+000100201010025500b8fff6b40f0f025500b8fff6b40d0d025500b8fffab40c0c025500b8ff
+fab40c0c065500b8fff0400a0d0d0655005d0c3b5b182b4e10f42b2b2b2b2b2b5d3c4dfd3c4e
+10f65d4df4e42b003f3cfd3c3f3cfd3c12392f3c10fd3c3130331121152111211521112115a2
+0424fc9e032bfcd5038405baadfe3facfe0dad000001006c03e9013d05c9000b00744026d307
+e30702b107c30702f208019308a1080273088208025508650802020801080b030008ab07b801
+50401e0103f90202010b003c010002033c0007380827000020010101190c9d79182b4e10f45d
+3c4d10f4e410fd3c003ffd3c103c10ed10fded0111123900c931300072717171710171711335
+331514070607273636377cc11f2b5b2c36350304f8d1a5863b512947165f5300000100300000
+04ba05ba00070089400d05021e04030200080706050409b80273b320040104b80101b7062001
+022f030103b80101b5010120000100b8ffe8400b1010025500080f0f025500b8fff2b40c0c02
+5500b8ffe2b40d0d025500b8fffcb40c0c065500b8fffeb40d0d065500b80273b308b699182b
+10f62b2b2b2b2b2b5d3c10f45d3c10fde45de6103c103c003f3f3cfd3c313021112135211521
+110213fe1d048afe1b050dadadfaf300000000010021fe5103ee0426001a01f7b10202435458
+401d0a140f030b031c190f12060b061340120f200b400c200f180f0f02550f192f2b1add1a18
+cd1a1910dd1a18cd003f3f3fed12173931301bb30f1c010fb8ffde406d1c392814560faf0a03
+400d400f020f202830341020283034070c0912160d1812270b270c270d360c360d350e99110b
+2812281348165912591359156912691369157906760d79117a147a15850d8a118c128c138914
+980aa80bbc10bb11ba14ea0ae714f50dfd10f914ff1c1e12b106024354584016130b1c1b040f
+440f840f030f190b031c190f12060b06003f3f3fed1112395d01111239391b40370f0f0c1011
+12120a00031914131325120a1412120a0f0c0f110c250b0a140b0b0a1312120c0c0b06031c19
+0f001c101c022f1cbf1c021cb8023fb50f1340124014b80254400b3f124012025f12bf120212
+b80142b60f012200451b0ab8025440120f200b4040200c300c4f0c03500cff0c020cb80142b3
+2f0f010fb8023fb41b207c66182b1a1910fd71f45d711a18ed1a1910ed1810f4e41910e45d71
+ed1a1810ed1910e45d7100183fed3f3c103c103c87052e2b087d10c487052e182b0e7d10c400
+111239870e103c3c08c44bb00e534bb018515a58bb000cffe8000bffe8013838595931300143
+5c58b90014ffdeb637390a2237390eb8ffe8b51539112215392b2b2b2b595d712b2b00715d2b
+015d591327163332363736373637013313161736371333010607060623227f143b2c3c481711
+26050bfe6dc2dd2b221f2be3b4fe6c4124307c5634fe67a91028241b6b0f1d0428fd9975817c
+76026bfbc8af425953000002005a0000040c05d3001e00220084402f8c1a8b1b027c1a7c1b02
+621a651b026b0c610e025a0c540e02360e440e021b190807040010271111000d2914011e00b8
+02af40232122213c1f0a1f3c2222203c21211e005e1e6e0a5e176a24105e201101116a23575a
+182b10f65ded10f6edf4ed103c10ed3c10fd003fed3c10f63c3fed12392fe41117393130015d
+5d5d5d005d5d012635343736373e0235342623220607273636333204151406070e0207033533
+1501d8011e163124bb38a477739a18b919f7cbd701005a8358361a02b8cd016924126a4d3a3b
+2ba5623a699f909916cddaeaa660a2744e4a606cfe97cdcd00000001004101b8026a026d0003
+002c401970027003024d014d0202012300021a05700001001904708d182b4e10e45d10e6002f
+4ded31300071015d1335211541022901b8b5b500000005ba001905ba001a05a7001904260018
+0000ffe70000ffe80000ffe7fe69ffe805ba0019fe69ffe802ea000000b8000000b800000000
+00a800ad016900ad00bf00c201f0001800af00b900b400c800170044009c007c009400870006
+005a00c80089005200520005004400940119ffb4002f00a1000300a100cd00170057007e00ba
+00160118ffe9007f008503d300870085000d002200410050006f008d014cff75005c00df0483
+0037004c006e00700180ff58ff8eff92ffa400a500b903c8fffd000b001a0063006300cdffee
+05d8ffdc002d005c0095009900df019209b500400057008000b9039d0072009a035d0401ff67
+fffa00030021007700cd0004004d00cd01c0022b004c006500e70118017c034305d8ffa3ffb0
+ffc40003001c005d0068009a00ba013501470221055cff4dffcd0016002d00780080009900b2
+00b600b600b800bd00da010c05f0ffa4fff00019002c0049007f00b400ce01c003fefd81fe3f
+000000050018002900390049006f00be00c700d0012301c1026f050c05320540057affd40014
+00310055005700a700b400e601f7027e027e027f03c60446ff42000e0085009100bf00c200c5
+00e1011a012f014f01560229026f029e03720008002c00310031006400690089009800c700de
+012b01b6020c02cf03a304ab04fb061dfee0ff0e00060026009b009d00c1010d011801200173
+018201d601e30243025f029b02e2039404a904d20761001c005e006d008d00ab00f701120138
+0151015b0168017c01870191019901cd01d001e802410254026b02ef0368037103bd04420442
+0453047304830586058b06e8fe58fec4fed1fef7ff32ff860051007c008100910095009e00b4
+00b900cf00d900d900df00e20105010b010e010e012001210155017b017b017e018d01a201a8
+01a901b401d001d001e201e901f201f501fb020002000206021b022102220222022302720277
+0294029c02cf02cf02d002ec02f903170322032b0335033c0359036f037103870390039003b5
+03e1041a04cf04ff053205320596059f05a805ab05c205f0060c0782080008ccfca3fd2afdde
+fe00fe88fe96feb2feb4ffe100150019001a001c001f003c005100610061006a0078009600a5
+00af00d3010c0118011a012a013e014c0151015f016a0171017801820184019a01a501a801a9
+01ae01bc01cd01d701ef0200020d021c02210222022e02350242024f024f025e026502710290
+029202b402d602fa0307030b030f0315032a0347035d036503740379039603b003cc03dd03e2
+03f603fc03fc03ff040a041f04220426042b0447045f0475049e04e704e7055c05cb05e5060a
+066d068606b806f10736073e07500751075d078f07b607d4086000b600c300b500b700000000
+000000000000000001e00381034503b5008e0233041902ce02ce002d005f0064034d023f0000
+02a80188027d01b402240578063b023b014e00f00426029402c6029f02f6023b034d014b0153
+006a0231000000000000061404aa0000003c04c300ed04bc026502ce03b50078060c017e02ef
+060c00b201000239000001c50330042b03cb00da03df010704a100db040a011701ed02a70350
+010b01bd043e05580021039c00ae0371017d00b5024500000afb088c012b014e01aa00870054
+013201f803ff0003024e00b4003703e30083006b02d800ed00770088009701640467008e0033
+017c00e700a6029e0329056e062a061501c90269048a021301b4000204a90000023901240103
+05140084015d039a06ef02d9007500cf040a00de03ac04bc02cf02ae034d04f005520168006d
+007d00860071ff810079055804d2016700030156002504e00094007c033204210094007f0072
+005c002f00b6001800ba00b80041034d00720018001f004c016a01550099009a009a009800b2
+00040078006900140057006e00ce00b4065402b80067050e016500e7000004cbfe52005affa6
+0099ff67006eff92002dffd40087ff7c00b800a800e5008f00a80185fe7b0070001e00d900de
+014c054602cf0546ff2d028a02d90253029600b7000000000000000000000000000001250118
+00ea00ea00ae0000003e05bb008a04d70053003fff8cffd500150028002200990062004a00e4
+006d00ee00e5004803c00033fe4e02b1ff460370007905df0051ffa7ff1f010a0068ff6c004f
+00bc00a507050061072b00ed04b001d200b6007b00650252ff740365fe690094008f005c0040
+00860075008900894043555441403f3e3d3c3b3a3938373534333231302f2e2d2c2b2a292827
+262524232221201f1e1d1c1b1a191817161514131211100f0e0d0c0b0a090807060504030201
+002c4523466020b02660b004262348482d2c452346236120b02661b004262348482d2c452346
+60b0206120b04660b004262348482d2c4523462361b0206020b02661b02061b004262348482d
+2c45234660b0406120b06660b004262348482d2c4523462361b0406020b02661b04061b00426
+2348482d2c0110203c003c2d2c20452320b0cd442320b8015a51582320b08d44235920b0ed51
+582320b04d44235920b09051582320b00d44235921212d2c20204518684420b001602045b046
+76688a4560442d2c01b10b0a432343650a2d2c00b10a0b4323430b2d2c00b0172370b101173e
+01b0172370b10217453ab10200080d2d2c45b01a234445b01923442d2c2045b00325456164b0
+50515845441b2121592d2cb00143632362b0002342b00f2b2d2c2045b0004360442d2c01b006
+43b00743650a2d2c2069b04061b0008b20b12cc08a8cb8100062602b0c642364615c58b00361
+592d2c45b0112bb0172344b0177ae4182d2c45b0112bb01723442d2cb01243588745b0112bb0
+172344b0177ae41b038a45186920b01723448a8a8720b0a05158b0112bb0172344b0177ae41b
+21b0177ae45959182d2c2d2cb0022546608a46b040618c482d2c4b53205c58b002855958b001
+85592d2c20b0032545b019234445b01a23444565234520b00325606a20b009234223688a6a60
+6120b01a8ab000527921b21a1a40b9ffe0001a45208a54582321b03f1b235961441cb114008a
+5279b31940201945208a54582321b03f1b235961442d2cb110114323430b2d2cb10e0f432343
+0b2d2cb10c0d4323430b2d2cb10c0d432343650b2d2cb10e0f432343650b2d2cb11011432343
+650b2d2c4b525845441b2121592d2c0120b003252349b04060b0206320b000525823b0022538
+23b002256538008a63381b212121212159012d2c4bb06451584569b00943608a103a1b212121
+592d2c01b005251023208af500b0016023edec2d2c01b005251023208af500b0016123edec2d
+2c01b0062510f500edec2d2c20b001600110203c003c2d2c20b001610110203c003c2d2cb02b
+2bb02a2a2d2c00b00743b006430b2d2c3eb02a2a2d2c352d2c76b8022323701020b802234520
+b0005058b00161593a2f182d2c21210c6423648bb84000622d2c21b08051580c6423648bb820
+00621bb200402f2b59b002602d2c21b0c051580c6423648bb81555621bb200802f2b59b00260
+2d2c0c6423648bb84000626023212d2cb4000100000015b00826b00826b00826b008260f1016
+1345683ab001162d2cb4000100000015b00826b00826b00826b008260f1016134568653ab001
+162d2c4b53234b515a5820458a60441b2121592d2c4b545820458a60441b2121592d2c4b5323
+4b515a58381b2121592d2c4b5458381b2121592d2cb0134358031b02592d2cb0134358021b03
+592d2c4b54b012435c5a58381b2121592d2cb012435c580cb00425b00425060c6423646164b8
+07085158b00425b00425012046b01060482046b0106048590a21211b2121592d2cb012435c58
+0cb00425b00425060c6423646164b807085158b00425b00425012046b8fff060482046b8fff0
+6048590a21211b2121592d2c4b53234b515a58b03a2b1b2121592d2c4b53234b515a58b03b2b
+1b2121592d2c4b53234b515ab012435c5a58381b2121592d2c0c8a034b54b00426024b545a8a
+8a0ab012435c5a58381b2121592d2c4b5258b00425b0042549b00425b00425496120b0005458
+212043b0005558b00325b00325b8ffc038b8ffc038591bb04054582043b0005458b00225b8ff
+c038591b2043b0005458b00325b00325b8ffc038b8ffc0381bb00325b8ffc038595959592121
+21212d2c462346608a8a462320468a608a61b8ff8062232010238ab902c202c28a70456020b0
+005058b00161b8ffba8b1bb0468c59b0106068013a2d2cb1020042b123018851b1400188535a
+58b910000020885458b202010243604259b12401885158b920000040885458b2020202436042
+b12401885458b2022002436042004b014b5258b2020802436042591bb940000080885458b202
+040243604259b94000008063b80100885458b202080243604259b94000010063b80200885458
+b202100243604259b94000020063b80400885458b202400243604259595959592d2cb0024354
+584b53234b515a58381b2121591b21212121592d00000001000000053333db0837495f0f3cf5
+081b080000000000a2e3272a00000000cf756123faaffd671000080c00000009000100010000
+000000010000073efe4e00431000faaffa7a100000010000000000000000000000000000002a
+0600010005c7006604730044047300870400003f0239002402aa008504730083040000500239
+0000047300860473004b0473004a040000880473004604730042047300870473008702390013
+06aa00870556005c01c700830556fffd01c7008802aa007c0556009606aa009802aa007c0556
+009e040000280639006d05c7009e0400001a05560009023900bf04e300a8055600a201c7006c
+04e30030040000210473005a02aa0041000000000000002c000001680000032c000004f40000
+089c000009c000000ac800000c6800000e1c00000e1c00000ffc000011bc0000149000001728
+000018dc00001acc00001c9000001e3400001f88000021b800002464000025600000270c0000
+28000000288000002a1800002d4400002dec00002ef4000030dc0000326c000033b4000035d4
+000037480000382c000038e4000039a800003a4c00003afc00003d5400003e4800003e900001
+0000002a00f2003c009d000700020010002f0056000004ac041d00050002b900540328b32618
+1fd0bc032900e0032900020329b22b1d1fb90327031db23b1f40b80323b31215320f412d0320
+0001002f0320000100200320006f032000af032000bf03200004005f031e00010010031e007f
+031e0080031e00af031e00bf031e00d0031e00060000031e0010031e0020031e006f031e009f
+031e00e0031e0006031d031cb2201f1041270319007f03190002000f031700ef031700ff0317
+0003001f0317002f0317004f0317005f0317008f0317009f03170006000f0317005f0317006f
+0317007f031700bf031700f00317000600400317b2923340b80317b28b3340b80317b36a6c32
+40b80317b2613340b80317b35c5d3240b80317b357593240b80317b34d513240b80317b34449
+3240b80317b23a3340b80317b331343240b80317b32e423240b80317b3272c3240b80317b312
+253280b80317b30a0d32c04116031600d00316000200700316000102c4000f0101001f00a003
+1500b0031500020306000f0101001f00400312b32426329fbf03040001030203010064001fff
+c00301b20d1132410a02ff02ef0012001f02ee02ed0064001fffc002edb30e11329f414a02e2
+00af02e200bf02e2000302e202e202e102e1007f02e00001001002e0003f02e0009f02e000bf
+02e000cf02e000ef02e0000602e002e002df02df02de02de000f02dd002f02dd003f02dd005f
+02dd009f02dd00bf02dd00ef02dd000702dd02dd001002dc0001000002dc0001001002dc003f
+02dc000202dc02dc001002db000102db02db000f02da000102da02daffc002d3b2373932b9ff
+c002d3b22b2f32b9ffc002d3b21f2532b9ffc002d3b2171b32b9ffc002d3b2121632b802d2b2
+f9291fb90326031cb23b1f40bb0322003e00330322b225311fb80318b23c691fb802e3b3202b
+1fa0413002d400b002d40002000002d4001002d4002002d4005002d4006002d4007002d40006
+006002d6007002d6008002d6009002d600a002d600b002d60006000002d6001002d6002002ca
+002002cc002002d6003002d6004002d6005002d6000802d0b2202b1fb802cfb226421f411602
+ce02c70017001f02cd02c80017001f02cc02c60017001f02cb02c50017001f02c902c5001e00
+1f02ca02c6b21e1f00410b02c6000002c7001002c6001002c7002f02c5000502c1b324121fff
+411102bf0001001f02bf002f02bf003f02bf004f02bf005f02bf008f02bf000602bf0222b264
+1f12410b02bb00ca0800001f02b200e90800001f02a600a20800406a1f402643493240204349
+3240263a3d3240203a3d329f209f26024026969932402096993240268e923240208e92324026
+848c324020848c3240267a813240207a813240266c763240206c76324026646a324020646a32
+40265a5f3240205a5f3240264f543240204f5432b8029eb724271f374f6b0120410f02770030
+02770040027700500277000402770277027700f90400001f029bb22a2a1fb8029a402b292a1f
+80ba0180bc0180520180a201806501807e01808101803c01805e01802b01801c01801e018040
+0180bb0138000100800140b40180400180bb013800010080013940180180ca0180ad01807301
+8026018025018024018020013740b80221b2493340b80221b2453340b80221b341423240b802
+21b33d3e320f410f0221003f0221007f0221000300bf022100cf022100ff0221000300400221
+b320223240b80221b3191e3240b80222b32a3f3240b80221b32e3a326f414802c3007f02c300
+8f02c300df02c30004002f02c3006002c300cf02c30003000f02c3003f02c3005f02c300c002
+c300ef02c300ff02c3000600df02220001008f02220001000f0222002f0222003f0222005f02
+22007f022200ef0222000600bf022100ef02210002006f0221007f022100af02210003002f02
+21003f0221004f0221000302c302c30222022202210221401d101c102b1048038f1c010f1e01
+4f1eff1e023700161600000012110811b8010db6f70df8f70d00094109028e028f001d001f02
+90028f001d001f028fb2f91d1fb80198b226bb1f41150197001e0401001f013900260125001f
+013800730401001f0135001c0801001f0134001c02ab001f0132b21c561fb8010fb2262c1fba
+010e001e0401b61ff91ce41fe91cb80201b61fe81cbb1fd720b80401b21fd51cb802abb61fd4
+1c891fc92fb80801b21fbc26b80101b21fba20b80201b61fb91c381fadcab80401b21f8126b8
+019ab21f7e26b8019ab61f7d1c471f6b1cb80401b21f6526b8019ab21f5e73b80401400f1f52
+265a1f481c891f441c621f4073b80801b61f3f1c5e1f3c26b8019ab21f351cb80401b61f301c
+bb1f2b1cb80401b61f2a1c561f291cb80101b21f231eb80401b21f5537b80168402c07960758
+074f07360732072c0721071f071d071b071408120810080e080c080a08080806080408020800
+0814b8ffe0402b00000100140610000001000604000001000410000001001002000001000200
+000001000002010802004a00b013034b024b5342014bb0c063004b6220b0f65323b8010a515a
+b005234201b0124b004b5442b0382b4bb807ff52b0372b4bb007505b58b101018e59b0382bb0
+0288b801005458b801ffb101018e851bb0124358b900010111858d1bb900010128858d595900
+1816763f183f123e113946443e113946443e113946443e113946443e11394660443e11394660
+442b2b2b2b2b2b2b2b2b2b2b182b2b2b2b2b2b2b2b2b2b2b182b1db0964b5358b0aa1d59b032
+4b5358b0ff1d594bb09353205c58b901f201f04544b901f101f045445958b9033e01f2455258
+b901f2033e4459594bb8015653205c58b9002001f14544b9002601f145445958b9081e002045
+5258b90020081e4459594bb8019a53205c58b9002501f24544b9002401f245445958b9090900
+25455258b9002509094459594bb8040153205c58b173244544b1242445445958b91720007345
+5258b9007317204459594bb8040153205c58b1ca254544b1252545445958b9168000ca455258
+b900ca16804459594bb03e53205c58b11c1c4544b11e1c45445958b9011a001c455258b9001c
+011a4459594bb05653205c58b11c1c4544b12f1c45445958b90189001c455258b9001c018944
+59594bb8030153205c58b11c1c4544b11c1c45445958b90de0001c455258b9001c0de0445959
+2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b
+2b2b2b65422b2b01b33b59635c456523456023456560234560b08b766818b080622020b16359
+4565234520b003266062636820b003266165b059236544b063234420b13b5c4565234520b003
+266062636820b003266165b05c236544b03b2344b1005c455458b15c406544b23b403b452361
+4459b347503437456523456023456560234560b089766818b080622020b134504565234520b0
+03266062636820b003266165b050236544b034234420b147374565234520b003266062636820
+b003266165b037236544b0472344b10037455458b137406544b24740474523614459004b5342
+014b5058b108004259435c58b108004259b3020b0a124358601b2159421610703eb0124358b9
+3b21187e1bba040001a8000b2b59b00c2342b00d2342b0124358b92d412d411bba0400040000
+0b2b59b00e2342b00f2342b0124358b9187e3b211bba01a80400000b2b59b0102342b0112342
+002b7475737500184569444569444569447373737374757374752b2b2b2b74752b2b2b2b2b73
+7373737373737373737373737373737373737373737373732b2b2b45b0406144737400004bb0
+2a534bb03f515a58b1070745b040604459004bb03a534bb03f515a58b10b0b45b8ffc0604459
+004bb02e534bb03a515a58b1030345b040604459004bb02e534bb03c515a58b1090945b8ffc0
+6044592b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b752b2b2b2b2b2b2b435c58b9008002bbb3
+01401e017400735903b01e4b5402b0124b545ab012435c5a58ba009f02220001007359002b74
+73012b01732b2b2b2b2b2b2b2b737373732b2b2b2b2b002b2b2b2b2b2b004569447345694473
+456944737475456944734569444569444569447374456944456944732b2b2b2b2b732b002b73
+2b74752b2b2b2b2b2b2b2b2b2b2b2b2b2b737475732b7374757374752b2b2b742b2b000000>
+] def
+/f-0-0 currentdict end definefont pop
+%%EndResource
+%%BeginResource: font Arial-BoldMT
+11 dict begin
+/FontType 42 def
+/FontName /Arial-BoldMT def
+/PaintType 0 def
+/FontMatrix [ 1 0 0 1 0 0 ] def
+/FontBBox [ 0 0 0 0 ] def
+/Encoding 256 array def
+0 1 255 { Encoding exch /.notdef put } for
+Encoding 32 /space put
+Encoding 49 /one put
+Encoding 50 /two put
+Encoding 78 /N put
+Encoding 80 /P put
+Encoding 89 /Y put
+Encoding 97 /a put
+Encoding 101 /e put
+Encoding 104 /h put
+Encoding 115 /s put
+Encoding 133 /ellipsis put
+/CharStrings 12 dict dup begin
+/.notdef 0 def
+/ellipsis 1 def
+/P 2 def
+/h 3 def
+/a 4 def
+/s 5 def
+/e 6 def
+/space 7 def
+/one 8 def
+/two 9 def
+/Y 10 def
+/N 11 def
+end readonly def
+/sfnts [
+<00010000000900800003001063767420fb3ea3da00000fd40000075a6670676d08e8ba280000
+1730000005d7676c7966468a4ff10000009c00000f3868656164e4841a9700001d0800000036
+68686561127e08d500001d4000000024686d74783bcf051400001d64000000306c6f63610000
+59e400001d94000000346d61787007f5047600001dc80000002070726570f14ae51600001de8
+000011d200020100000005000500000300070042b40201fe0607b8023f4013000504fe03000a
+0704fe010019080605fe0203bc0126000901b0011800182b10f63cfd3c4e10f43c4dfd3c003f
+3cfd3c10fc3cfd3c3130211121112521112101000400fc2003c0fc400500fb002004c0000003
+00c900000738011900030007000b003e402406050201040a380b0b07070404000a022601f207
+2605f20b26200840080208190ccdab182b4e10f45d4dfdf6fdf6ed003f3c103c103c10ed1732
+3130211121112111211121112111061e011afc3c0119fc3c01190119fee70119fee70119fee7
+00020095000004f805ba000f001b007740250605b914b918034705016705d605021211250d0e
+0e001b10250201020f00081627af070107b8ffc0b3090b3407b8028c40161f1d301d601d701d
+801d051d100f2001200030000200b8028bb31c3153182b4e10f45d3c4dfd3c4d1071f62b714d
+ed003f3c3f3cfd3c12392f3cfd3c3130005d71015d3311212017161615140606070623231902
+333236363534262726239501db010e527eaa62974e6ac9c1a2af76435e4835a005ba1621ddaf
+87b8691115fdd704c2fe602e624150680d0a000100920000045905ba001600b2402b0f011f01
+39013302331042014211de01f901090705160524025811681105010102131411121303140201
+0fb80274401d0307090a0a14150a1600000b0a26080940202434af0901ff0901091a18b8ffc0
+40162224349018a018027018f01802ef1801180014261615b8ffc0400f202434a01501f01501
+1519173f3c182b4e10f471722b3c4dfd3c4e105d71722bf671722b3c4dfd3c003f3c3f3c3c10
+3c3fed3939111739030e103c083c3130015d005d01113633321e021511211134262623220606
+1511211101ab88bd619c4f1dfee720513d466e33fee705bafde59f4870888ffd910231a75a35
+448986fdec05ba0000020049ffe8042e043e0023003201714068071a081c051d161a4a1b481c
+4925db10df11093619461957266619672686269219931aa61ab91bc71ac81b0c06060d151606
+191627062915591977028602a606b506c6060cbf34d910021d243231112c0d24402b2e342440
+2228342440191d346f24fc240224461db8ffc040300e0f343d1d01001d101db01df91d041d1d
+2c013300400e0f340f001f0002005521401c113f21401b103f2140181a3421b80274b504070c
+0d0a2cb8ffc0b31c113f2cb8ffc0b31b103f2cb8ffc0b3181a342cb802744040140b1e312608
+2909280d591f0c9f0c021f0c01ff0c010c400e16340c1a4f340134600001008e300101013329
+215f1701df17014f175f176f17031719336941182b4e10f45d71724dedf471ed5d4e105df62b
+5d71724dedf4e4fd3c003fed2b2b2b3f3c3ffd2b2b2bf45d2be412392f5d712bb10602435458
+b22f1d017159edb10602435458b90024ffc0b71b1d3454246424025d2b595d2b2b2b11123903
+0e103c3c3c313001715d005d7101273636333216161503141617212627262706062322263534
+36363736373534262322060106060706151416333237363736350165ff2bd2cfbcb84b031b25
+feea0b10070348a45da4bd569b92c54c506f4b54015e36ea243758444c4533100b02e22e9a94
+5989b7feb88c854c1c3719084646b2885a8d4b1c25201c51453bfed2123218273c3b56322637
+2465000000010030ffe80410043e002a02c240c006110623082717111723981298149727952a
+09071446140212bb0db90ec721e523f80df62206290d550d650d950b9712a722b90c07412340
+2444266722642687128714862283240937264506460b4a0d4f0f462142220722242726370c35
+2135223523352407060a05110921180d270c22222223072422402c730c781479157629752a88
+15842a9a15952ab422b4230d80018f178c18992aa92ab02c061721164021233416401c1f341f
+1601df16011633082150258f25022540181d34251a2cb8ffc04017110a3f502c01302c012f2c
+012c1e213010011033012100b8ffc0b30f093f00b8ffc0b3110a3f00b8ffc04009090d340019
+2b78b8182b4e10fc2b2b2b4dedf471ed4e105d71722bf62b714dedf471722b2bed00b1060243
+54584035060101060116012622362146215401591764016917f6010a011702041a2122022813
+0d0c02281a5f04010446280b501a011a4613073ffd5d3ffd5d1112173911121739111217395d
+711bb90022ffcbb3282a3421b8ffcbb3282a3422b8ffe0b31e243421b8ffe0b31f243422b8ff
+e0b3191a3421b8ffe0401b191a346b0d0136224622980d9422c422d4220621220c0d04041a00
+b8ffc0b5191b34003301b8ffc0b3172d3f01b8ffb0b3090a3e01b8ffc0b322253401b8ffc040
+1d1a1c3400013001400150010460018001f0010300011001500160010401b8ffc0b313163401
+b80101404d0004015f04f004020446280b1640191b3416331740172d3f1740090a3e17403537
+3417402b2e34174025293417401a1c340f171f175f176f170417551a402224340f1a01501aff
+1a021a4613073ffd5d712bf45d2b2b2b2b2b2be42b3ffd5d71f42b5d71722b2b2b2be42b1112
+17395d712b2b2b2b2b2b59313001715d00717171715d5d435c58b90024ffc940090b123f0f28
+0b123f21b8ffecb60d390c140c3921b8ffecb20c3922b8ffeab10b39002b2b2b2b012b2b5900
+715d132516163332373635342726272427263534363332161705262623220706151417160417
+1615140623222630011a126e636d3725141549feac5b7edae5dad428fef7115f586f30201c26
+01c15958f4efd9fd012f2b5255281c2f201514114b3e56998abc8e8b313e421f16231e151c66
+4a4b8692d2b0000000020041ffe80427043e0014001c01a3b90010fff840460b3999099a0d96
+10a805a70abb09bb0db81a0808140148024706460a4f1ea80db606b61ac70ac80cd60ad80cf8
+07f70d0d1c0f1c15401b1d3415400e11340f15bf15cf150315b8ffc0b30f1e3f15b8ffc0b30e
+173f15b8028d400c0f0e12500e600e020e181201b8ffc0b6191b3401331200b8ffc0b31d2034
+00b8ffc0b322293400b8ffc0b32b2d3400b8ffc0b3181c3400b8ffc0400f0e0f34a000010000
+100002005f1212b80274b3040b1218b8027440240b070021012f15214f0e010e1a2f1e5f1e6f
+1e9f1e041e0f2108400d0f3408191d6941182b4e10f42b4ded4e105df65d4dedf4ed003fed43
+5c584014184028143f18401e0f3f18401b103f18401c113f2b2b2b2b593ffd435c58b90012ff
+c0b328143f12b8ffc0b31e0f3f12b8ffc0b31b103f12b8ffc0b21c113f2b2b2b2b59f45d712b
+2b2b2b2b435c58b90000ffc0b2123900b8ffc0b2173900b8ffb0b3090a3e00b8ffc0b241213f
+2b002b2b2b59e42b1112395d435c5840140e400f1e3f0e401c113f0e401b103f0e400e173f00
+2b2b2b2b592f3cfd2b2b722b2b3c0111333130015d71005d2b01050606232027263510003332
+0003211616333236132626232207061702fa011836e9affeeb85690114d3ed011206fd400382
+61425a270378565c3c3c0101522f9aa1b591dd0108012bfec7febd7d8b48016c7a7f43437300
+000100a20000032605c00009005640096b027b028b02030204b80129b35f050105b8025eb708
+090501000c0900bb015800020001025d400d0500041f042004b0040404190aba01a701a00018
+2b4e10f45d3c4df63cfd3c003f3c3f3cf45ded393130005d212111060735362437330326fee7
+9ad16e010230e404239045ff24c986000000000100330000040c05c0001d013f405fb518b61a
+b91bca04c718d018d019d01a08431b431c431d56199b049518aa04a61c08061a20002806371a
+480443184319431a0824182419241a03162604560488189c1b9c1c9c1daa1caa1d0812001d10
+1d201d311d761d841d901dd61d081db8ffc040161415341d02100c0f1d100020000220003000
+40000300b8ffc0b312163400b802a1b302010c0fb8015640231f0c2f0c020c401216340ca613
+0509d81616014f0001001a1f0fd8107702191ed3c2182b4e10f44df4ed4e10f6713c3c4d10ed
+003ffd2b71e43f3cfd2b5d713c1112390111332b5d435c58b9001dffc0b211391db8ffc0b20f
+391db8ffc0400e10390408103905081139040811392b2b2b2b2b2b59b10602435458400b091b
+191b021b1301041300001112391112395d593130015d4b5158bd001bffe0001cffe0001dffe0
+3838385901715d5d5d0111213612373637363534262322060725362433321615140607060406
+07040cfc2710a0ecbe2b3a6559586808fee8190108c6d9f8474d33fef647160105fefb940109
+dbb13f57555e656a7b1ce8caeaae63b36241f450260000000001fffd0000055805ba000800c5
+b90004ffbe403e0b35c00a010403040503070405040305010403040503200201140202010405
+04030520060714060607010407030608070401030203090400050a08015607b80267400b0606
+05050303020200080abb0217000800060217b5070708200002b80217b70101200030000200b8
+0266b3095e63182b10f65d3c1910e41810fd3c1910e41810e4003f3f3c103c103c10f6e40111
+123912391139001117391112173987052e2b087d10c487052e182b087d10c40708103c870810
+c43130015d002b2111012101012101110216fde7015b015901520155fde502690351fdbc0244
+fcadfd990000000100980000052305ba000901ce400e0903060819031708041208020303b8ff
+00b3120b3f03b8ffc0b35b5d3403b8ffc0402a53543403320708140707080308020207030904
+02020907080304405b5d340440535434043206cf050105b8028b4019400b500b600b03700b80
+0b02a00bc00b02200b300b020b0809b8ffc0b35b5d3409b8ffc0400e53533409320120003000
+c0000300b8028bb30a3175182b4e10f45d3c4dfd2b2b3c4d105d5d7172f65d3c4dfd2b2b3c00
+3f3c3f3c011112393900123939872e2b2b2b2b877dc4b10602435458b90003ffe040090e2734
+08200e273403b8ffc0b7090d340840090d34002b2b2b2b5931304358b90003ff80b60b350880
+0b3503b8ffc0403d1a2e3408531a2e34050316033203400304460385089008a008b208e40306
+c403cf08da080320032f0834033b084f0892039f08a003af08b003bf080b07b8ffc040093335
+34024033353407b8ffe0400d2f323402202f32340207143507b8ff974009212e340254212e34
+07b8ffc040461e203402541e20340802070718020317072c0227073b0233074e0240075c0256
+070914021b074d0245079a07ab07cb02d902e802e707f9020b270228074a0778078807ac0206
+015d7172722b2b2b2b2b2b2b2b2b005d5d71722b2b2b2b59005d331121011121112101119801
+2002580113fed7fdb105bafc2d03d3fa4603bcfc4400000005ba001c05ba001c05a7001c0426
+001c0000ffe40000ffe40000ffe4fe69ffe405ba001cfe69ffe402ea0000011d0000011d0000
+00000000000000b200ac00d70128012000b301fa001700f8011901310049000400f7000300af
+00fd00950014005400960112002400160055004901040119012b008c019bff76ffe9003d0092
+00a2ffb70182ffaa0016008f00c600f8001c00de04010037004e00550055006500e903e50059
+ff9a00080087000b003b00520116006100d600d600f500000093009400be017cfff800040014
+00820092003c00410041ffc1fffc002a008c049005d809b5009100bb0106ff63ff69001e0022
+008a022bffd6ffdf0026005900a300ac0104012b01c004480021006b00850098011903c6006b
+009500a400fe010c025d034305bf000000490056006e0077008a00aa00ca0112015005d805f0
+ff7bffe70006001300280061006900e90135014d02a5040cff3effda005b00b900c801190119
+011901c0045b04a7055bfe3fff9dffc2001500b7010a01bc01c10532058efd81ffa1ffae000c
+00260031003d004e00560062008300c100c900f100f2027fff7f00480053007700c5011d0120
+0126012801d60219027e027e03d3002e0041005d006b0075009f00b000b200ba00bb00bd00d6
+00db00e000e50114011b014a0162019101f2020c026402cf039b03b403d4040104a900160023
+0025002a007400a500b600cc00cd00cf0105012001300150016a016f0197019d01e002b002ec
+02f70408048304fb04fd0526fee0fefbff4efff50018001a004c007a007f009100a300b300b4
+00ce00d500f200f300f601100138016801a101b001e001ec02090222024f0270029602a502ad
+034e039103c104350442046b04cd04da0586058b076107fefca6fe93feadfed1ffb7ffd10003
+000e00180026004600690081008f00a500bf00d300d500d900dd00e20119012b0138013b015a
+015e016801730188019401ad01c501d101ea01f20200020002000222023b0244024f026f0272
+027e02820293029402a502cf02cf02d002da02dd02eb02f5030503220336037103a103b003b8
+03d003e604100426042e0431044f045a04ff053205320547055305a805ab05c205f0063c0664
+067006e80782078408ccfd2afddefe00fe68feb0feb3ffaa00080059007a0091009e00a200af
+00b400bb00ca00cc00ce00d900e000f40114011a01210127012b01390146014b014d0157015c
+01650182018701920198019b01a201ae01c501c501d102070222022b02410253026102650284
+0287028d02b402b402ba02c902d602d802ed02f503170323032b03310349035a035b036e0371
+0374037e03840391039103aa03cf03d303e703e803ed04080417041e0475047a049904a704b4
+04d1054c056d056d05a205bf05c005d105fc05fc0602061a061c062f066a06a806e207060736
+0750078907d407f30870011c012a011a01200000000000000000000000000219000b001e02aa
+0214047f01ed0000001d0104000f0091002b01880153011201f3003f03fe0168010e047f01ed
+036e03150219041300000000064004b00000027401bb003501c5007f06020301000004e000b2
+01dc02e004c3023d00d50160011904a7036e05ca022100ab0426009002bc02bb014200b4023c
+0256029c030001e501a800e5006b00780094016b017300ab01ed013a017d0137017f00d40216
+03530184003cffa202040109014901f0006e031500810464005e000000000000000000000000
+000000000000000000000000000000000000013900dc00e9fe9e040d047c012b00b800960059
+00ac00df01a900fa0105ffec00170003005500610004008c00a3008500280120005d00d6007f
+012601190104016c06cf00b4010600000737063e047a00f000f900e905ba042604420000ffe7
+fe69049e04e3ff37ff2d01200105012000a800740068004700f200e500d900bd00a800680047
+005c0048000a0028003200410050005a0064007d00870091ffb0ff9cff83ff79ff6f00cb0120
+00fa012c01fa01a000d500b8005c003c00c800c8008f00d9018b00b300470000000000000000
+0000000000000000000000000000000000000000000000000000000000000000000000000000
+0000000000000000fe6400c000ea01180125013203b003ed0576059005aa05b405be05cd0631
+00780084009b00cc00e200f4010a0120016300d100ea00f7010801420019002c003400410038
+00480058006c025903bd0043011a007000d30028003700420050005a0064007300780082008c
+009c00a500bd00ce00f00110015c00be00d801020117012c016300ea01080041004b0055005f
+007300a60109018301b300410064001e002a00eb00fa010e01380274002c00400082009600b6
+00c000cc00dc00e600f000ff010a0120012c013b01440156016300f700570064011001360050
+01b10000ffb60039004e004403cc00e5002401100042012201a400f0006000e0000e001d0039
+05e30102002cfe4eff38026903bd011600ff000e00a00054001b003d01710041000f005000fd
+0015014f0035fe52002c00d3010304b001d200b600c000990265ff870377fe6c00cb00a9005c
+0040047607440000404154403f3e3d3c3b3a3938373534333231302f2e2d2c2b2a2928272625
+24232221201f1e1d1c1b1a191817161514131211100f0e0d0c0b0a090807060504030201002c
+4523466020b02660b004262348482d2c452346236120b02661b004262348482d2c45234660b0
+206120b04660b004262348482d2c4523462361b0206020b02661b02061b004262348482d2c45
+234660b0406120b06660b004262348482d2c4523462361b0406020b02661b04061b004262348
+482d2c0110203c003c2d2c20452320b0cd442320b8015a51582320b08d44235920b0ed515823
+20b04d44235920b09051582320b00d44235921212d2c20204518684420b001602045b0467668
+8a4560442d2c01b10b0a432343650a2d2c00b10a0b4323430b2d2c00b0172370b101173e01b0
+172370b10217453ab10200080d2d2c45b01a234445b01923442d2c2045b00325456164b05051
+5845441b2121592d2cb00143632362b0002342b00f2b2d2c2045b0004360442d2c01b00643b0
+0743650a2d2c2069b04061b0008b20b12cc08a8cb8100062602b0c642364615c58b00361592d
+2c45b0112bb0172344b0177ae4182d2c45b0112bb01723442d2cb01243588745b0112bb01723
+44b0177ae41b038a45186920b01723448a8a8720b0c05158b0112bb0172344b0177ae41b21b0
+177ae45959182d2c2d2cb0022546608a46b040618c482d2c4b53205c58b002855958b0018559
+2d2c20b0032545b019234445b01a23444565234520b00325606a20b009234223688a6a606120
+b01a8ab000527921b21a1a40b9ffe0001a45208a54582321b03f1b235961441cb114008a5279
+b31940201945208a54582321b03f1b235961442d2cb110114323430b2d2cb10e0f4323430b2d
+2cb10c0d4323430b2d2cb10c0d432343650b2d2cb10e0f432343650b2d2cb11011432343650b
+2d2c4b525845441b2121592d2c0120b003252349b04060b0206320b000525823b002253823b0
+02256538008a63381b212121212159012d2c4bb06451584569b00943608a103a1b212121592d
+2c01b005251023208af500b0016023edec2d2c01b005251023208af500b0016123edec2d2c01
+b0062510f500edec2d2c20b001600110203c003c2d2c20b001610110203c003c2d2cb02b2bb0
+2a2a2d2c00b00743b006430b2d2c3eb02a2a2d2c352d2c76b8023623701020b802364520b000
+5058b00161593a2f182d2c21210c6423648bb84000622d2c21b08051580c6423648bb8200062
+1bb200402f2b59b002602d2c21b0c051580c6423648bb81555621bb200802f2b59b002602d2c
+0c6423648bb84000626023212d2cb4000100000015b00826b00826b00826b008260f10161345
+683ab001162d2cb4000100000015b00826b00826b00826b008260f1016134568653ab001162d
+2c4b53234b515a5820458a60441b2121592d2c4b545820458a60441b2121592d2c4b53234b51
+5a58381b2121592d2c4b5458381b2121592d2cb0134358031b02592d2cb0134358021b03592d
+2c4b54b012435c5a58381b2121592d2cb012435c580cb00425b00425060c6423646164b00351
+58b00425b00425012046b01060482046b0106048590a21211b2121592d2cb012435c580cb004
+25b00425060c6423646164b807085158b00425b00425012046b8fff060482046b8fff0604859
+0a21211b2121592d2c4b53234b515a58b03a2b1b2121592d2c4b53234b515a58b03b2b1b2121
+592d2c4b53234b515ab012435c5a58381b2121592d2c0c8a034b54b00426024b545a8a8a0ab0
+12435c5a58381b2121592d2c462346608a8a462320468a608a61b8ff8062232010238ab902a7
+02a78a70456020b0005058b00161b8ffba8b1bb0468c59b0106068013a2d2cb1020042b12301
+8851b1400188535a58b910000020885458b202010243604259b12401885158b9200000408854
+58b202020243604259b12401885458b2022002436042004b014b5258b2020802436042591bb9
+40000080885458b202040243604259b94000008063b80100885458b202080243604259b94000
+010063b80200885458b2021002436042595959592d0000010000000533330f4aa24a5f0f3cf5
+0819080000000000a2e33c1d00000000cf756127fafafcfd1000082400010009000100010000
+000000010000073efe4e00431000fafafa7a100000010000000000000000000000000000000c
+06000100080000c90556009504e3009204730049047300300473004102390000047300a20473
+00330556fffd05c70098000000000000006c000000e0000001b0000002b0000004c000000808
+00000a1400000a1400000a9800000c4000000d3800000f3800010000000c00f2003c008f0006
+00020010002f00550000073c02c200050002b15480410d03ac00af03ac0002001003ac002003
+ac00a003ac0003004003acb3070d3240b803acb312143240b803acb2162b32b9ffc003acb23a
+3340b803acb32d943280bc03ab005f0033ffc003abb2553340b803abb340443240b803abb333
+3b3240b803abb32f313240b803abb2083340b803abb30714321f411a03ab002f03ab0002000f
+03ab002f03ab004f03ab008f03ab009f03ab00bf03ab0006001003ab00df03ab00ff03ab0003
+03a803a2b2461f40b803a5b208330f411403a50001004003a500cf03a500ff03a50003002003
+a500af03a500ef03a500ff03a50004ffc003a3b3090c3240b803a3b208330f411b03a3000100
+0f03a3001003a3008003a300af03a300cf03a30005006f03a3009f03a300ff03a30003009f03
+a200af03a2000203a203a1b2101f10410a039e007f039e0002039a000f0101001fffc00398b3
+10143240b80399b30f1332404110039500500395000200b0034d00c0034d0002006f0391007f
+03910002ffc0034bb22d3132b9ffc0034bb30a0e32104110038b0020038b0080038b000300a0
+038b00010020038b0040038b0002ffc0038bb313163240b80383b20f1132b9ffc0037bb23034
+32b9ffc0037bb3101832504114037800010365036e0023001f037e036e001e001f0363036e00
+1d001f03620364000d001fffc00340b30f1032804110033f0001033f03150029001f03410316
+0032001f0344031a001b001fffc00375b20e1132b9ffc00375b2282a32410a03430318003200
+1f030f030d0034001f03080307b2321f20bb0340000100400388b3090b3240b80388b2101532
+bd038503070014001f03800307b2171f0fbd030a002f030a0002ffc00354b3090d3290410c03
+5400a003540002001f036e0001009f036e00010040036eb2090b3241110345031c0016001f03
+6b031d0015001f0346031e0015001f03a703a10046001f039db3261c1fc0bb03930001004003
+92b3090d3240b8033eb2083340b8033eb30d0e32c04109033e000100b0038e00c0038e0002ff
+c00390b326383200412603280030032800020020037f0030037f00020010038a0030038a0050
+038a006f038a007f038a009f038a000600000389003003890002002f037a0070037700900377
+009f037a0004ffc00315b20f1032b9ffc00315b2242832b903190318b2321f10bb031a0001ff
+c0031ab3090e3240b80318b2121332b9ffc00318b30c0e323fbd0373004f0373000200400374
+b31718326fbb032a00010040032cb3181b3240b80370b2090c32bd031703160032001fffc003
+16b20e1132bd031c031e0016001f031d031eb2151fb0411f031e0001000f031f000102ca02d0
+0015001f02d302d5000d001f02cf02d0000d001f02cb02d0000d001f02cd02d0000d001f02ce
+02d0000d001fffc002d0b3090c3240b802d2b3090c32e0411c02e50001005f02dd009f02e500
+0202bb02c30030001f02da02b80032001f02d902b9003f001f02d802b80064001f02b902b800
+33001f02bab221c81fb802b8b321c81f40b8039bb20d1632b9ffc002c3b22b2f32b9ffc002c3
+b21f2532b9ffc002c3b2171b32b9ffc002c3b2121632412502c202c1001c001f02d702c10024
+001f02c102c00022001f02bf02c00018001f02c0027400c8001f02b50235003b001f02b40235
+003b001f02c402bc001e001f02b702b60038001f02b3b20ec81fb802b0b207c81fb802afb206
+c81fb802aeb200c81fb802afb2502f1fbc02ae02ab001a001f02adb2261a1fb802a8b326241f
+0fbb0235000102a50274b21d1f12410a02a1015801f4001f02a000d801f4001f001202a2b237
+c81fb80290b2bc201fb9029002904018374025402d40a6033025302d30a6032025202d203720
+a6204110028e0005009f028b0001028b028b003700200289003002890040028900900289b204
+37b041fd027400c0027400020080027400a00274000200600274007002740002000002740010
+027400020080027400f002740002003f0285004f028500020090027e0090027f009002800090
+028100040090027a0090027b0090027c0090027d000400900274009002750090027700030070
+027e0070027f007002800070028100040070027a0070027b0070027c0070027d000400700274
+007002750070027700030060027e0060027f006002800060028100040060027a0060027b0060
+027c0060027d000400600274006002750060027700030050027e0050027f0050028000500281
+00040050027a0050027b0050027c0050027d000400500274005002750050027700030040027e
+0040027f004002800040028100040040027a0040027b0040027c0040027d0004004002740040
+02750040027700030030027e0030027f003002800030028100040030027a0030027b0030027c
+0030027d000400300274003002750030027700030020027e0020027f00200280002002810004
+0020027a0020027b0020027c0020027d000400200274002002750020027700030010027e0010
+027f001002800010028100040010027a0010027b0010027c0010027d00040010027400100275
+00100277000300e0027e00e0027f00e0028000e00281000400e0027a00e0027b00e0027c00e0
+027d000400e0027400e0027500e00277b103d041c5027e00d0027f00d0028000d00281000400
+d0027a00d0027b00d0027c00d0027d000400d0027400d0027500d00277000300300274004002
+74000200c0027e00c0027f00c0028000c00281000400c0027a00c0027b00c0027c00c0027d00
+0400c0027400c0027500c00277000300b0027e00b0027f00b0028000b00281000400b0027a00
+b0027b00b0027c00b0027d000400b0027400b0027500b00277000300a0027e00a0027f00a002
+8000a00281000400a0027a00a0027b00a0027c00a0027d000400a0027400a0027500a0027700
+030090027e0090027f009002800090028100040090027a0090027b0090027c0090027d000400
+900274009002750090027700030020027e0020027f002002800020028100040020027a002002
+7b0020027c0020027d00040020027400200275002002770003028101580801001f0280012908
+01001f027f00ec0801001f027e00d80801001f027d00b10801001f027c00a60801001f027b00
+820801001f027a00370801001f027700260801001f027500200801001f0274001f0801b21f37
+0f41160235004f0235005f0235006f0235009f023500af023500bf0235000700af023500cf02
+3500df023500ff02354022040f074f079f07af07bf0705af07e007020f064f069f06af06bf06
+05af06e0060220411b020d0001005f02350001008f02350001007f023500ef02350002002f02
+35003f02350002003f0234004f0234000202350235023402344011ed20ef2a01cf2a01bf2a01
+af2a018f2a01410902470104001e001f022000370201001f0158400c263e1fd8263e1f372627
+3e1fb8028eb6ec171fb226361fb801bcb226361fb80129402b26361fec26361fb126361fa626
+361f8226361f3726361f3226361f2d26361f2526361f1f26361f37262a1fb801584022263e1f
+d8263e1fbc263e1f27263e1f21263e1f20263e1f3700161600000012110840b9020d01a6b3c5
+0d0009b801bcb227281fb801bbb227301fb801b8b2274f1fb801b7b227621f410901b6002701
+01001f01b5002002ab001f01afb21fe41fb801adb21fe41fb801acb21fbb1fb801a8b21f341f
+b8015db2272e1fb8015bb227cd1f410d0155001f0401001f0154001f0401001f0153001f0201
+001f0152b21f561fb80151b21f291fb8012bb227261f410d012a00270125001f0129015800e4
+001f0125001f0401001f0124b21fe41fb80123b21f3b1fb80122b21f391f410d010800270801
+001f0106002d0101001f0105001f0101001f0103b31fbb1fefb901580401400b1fed1f931fec
+1fe41feb1fb80201b21fd920b80401b21fcf25b80156400a1fbc2d9e1fbb1f411fb2410a0158
+0401001f00b101580401001f00b001580401b51fa625891f9bb901580125b61f991f2e1f8e2d
+b80801b51f8d1f291f89b901580401b21f8220b802ab40131f801f301f742de41f731f4a1f61
+1f521f5d25b802abb21f5c1fbc0801001f0059015802abb61f5025891f491fb80125b21f4725
+b80401400b1f461f791f401f271f3920bc02ab001f003801580401b21f372dbc0125001f0032
+01580125b61f2c1f341f2a25b80801b21f5537b80111402a07f00790075b0742073b07230722
+071e071d071408120810080e080c080a080808060804080208000814b8ffe0402b0000010014
+0610000001000604000001000410000001001002000001000200000001000002010802004a00
+b013034b024b5342014bb0c063004b6220b0f65323b8010a515ab005234201b0124b004b5442
+b0382b4bb807ff52b0372b4bb007505b58b101018e59b0382bb00288b801005458b801ffb101
+018e851bb0124358b10100858d1bb900010119858d5959001816763f183f123e113946443e11
+3946443e113946443e113946443e11394660443e11394660442b2b2b2b2b2b2b2b2b2b2b182b
+2b2b2b2b2b2b2b2b2b182b1db0964b5358b0aa1d59b0324b5358b0ff1d594bb08153205c58b9
+020f020d4544b9020e020d45445958b90470020f455258b9020f04704459594bb0e453205c58
+b90020020e4544b90027020e45445958b908420020455258b9002008424459594bb801255320
+5c58b90026020f4544b90021020f45445958b90a0d0026455258b900260a0d4459594bb80401
+53205c58b1d8204544b1202045445958b9250000d8455258b900d825004459594bb804015320
+5c58b9015800264544b1262645445958b923200158455258b9015823204459594bb02953205c
+58b11f1f4544b12d1f45445958b9010d001f455258b9001f010d4459594bb02f53205c58b11f
+1f4544b1251f45445958b90135001f455258b9001f01354459594bb8030153205c58b11f1f45
+44b11f1f45445958b91428001f455258b9001f14284459592b2b2b2b2b2b2b2b2b2b2b2b2b2b
+2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b
+2b2b2b2b2b2b2b2b2b2b2b2b65422b01b331757ec3456523456023456560234560b08b766818
+b080622020b17e754565234520b003266062636820b003266165b075236544b07e234420b131
+c34565234520b003266062636820b003266165b0c3236544b0312344b100c3455458b1c34065
+44b23140314523614459b33f3c5841456523456023456560234560b089766818b080622020b1
+583c4565234520b003266062636820b003266165b03c236544b058234420b13f414565234520
+b003266062636820b003266165b041236544b03f2344b10041455458b141406544b23f403f45
+2361445945695342014b5058b108004259435c58b108004259b3020b0a124358601b21594216
+10703eb0124358b93b21187e1bba040001a8000b2b59b00c2342b00d2342b0124358b92d412d
+411bba04000400000b2b59b00e2342b00f2342b0124358b9187e3b211bba01a80400000b2b59
+b0102342b0112342002b2b2b2b2b2b2b2b00b01243584bb035514bb021535a58b1262645b040
+614459592b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b2b737373737345b04061441800456944
+4569447373737473737374737473742b2b2b2b2b2b2b2b2b2b2b2b0073737373737373737373
+7373737373737373737373737474747474747474747474747474747474747474747575757374
+757575752b7300004bb02a534bb036515a58b1070745b040604459004bb02e534bb036515a58
+b1030345b0406044b1090945b8ffc06044592b4569440174007373732b4569442b012b435c58
+400a0006000702a006a00702b9ffc00274b31a1d326fbd0277007f02770002ffc00277b22f31
+32b9ffc00277b322253240b80274b32f353240b80274b3282a3240b80274b21a2132b8ffc0b3
+371a1d32b8ffc0b3251a1d32b8ffc040112d1a1d329025902d9037a025a02da03706b8ffc0b6
+a61a1d321fa61fb8028eb22fa60300742b732b2b2b2b2b2b2b2b742b737459002b2b435c58b9
+ffc002a1b21c1d32b9ffc002a0b21c1d322b2b592b73012b2b2b2b002b2b2b2b2b2b2b2b2b2b
+2b2b2b2b2b2b2b2b012b2b2b2b2b2b2b73742b2b2b2b2b2b2b2b73732b2b2b2b2b2b732b732b
+2b2b742b2b2b73737373732b73732b2b2b732b2b002b2b2b2b7374732b732b2b2b2b752b2b2b
+2b2b2b2b2b752b2b2b2b2b732b2b2b2b7374752b2b7373732b2b2b732b737374752b2b737475
+2b2b7374752b2b2b2b2b2b2b2b2b2b2b2b74752b000000>
+] def
+/f-1-0 currentdict end definefont pop
+%%EndResource
+%%EndSetup
+%%Page: 1 1
+%%BeginPageSetup
+%%PageBoundingBox: 0 -1 316 283
+%%EndPageSetup
+q 0 -1 316 284 rectclip q
+q
+0 282.4 316 -283 re W n
+[ 1 0 0 1 0 -0.600006 ] concat
+  q
+1 g
+0 0 316 283 rectfill
+  Q
+Q
+1 g
+238.129 227.349 76.441 -225.961 re f*
+0.498039 g
+238.012 1.388 m 238.012 3.068 l 238.371 3.068 l 238.371 1.388 l h
+238.012 4.388 m 238.012 6.068 l 238.371 6.068 l 238.371 4.388 l h
+238.012 7.267 m 238.012 8.947 l 238.371 8.947 l 238.371 7.267 l h
+238.012 10.267 m 238.012 11.947 l 238.371 11.947 l 238.371 10.267 l h
+238.012 13.267 m 238.012 14.947 l 238.371 14.947 l 238.371 13.267 l h
+238.012 16.267 m 238.012 17.947 l 238.371 17.947 l 238.371 16.267 l h
+238.012 19.146 m 238.012 20.947 l 238.371 20.947 l 238.371 19.146 l h
+238.012 22.146 m 238.012 23.83 l 238.371 23.83 l 238.371 22.146 l h
+238.012 25.146 m 238.012 26.83 l 238.371 26.83 l 238.371 25.146 l h
+238.012 28.146 m 238.012 29.83 l 238.371 29.83 l 238.371 28.146 l h
+238.012 31.146 m 238.012 32.83 l 238.371 32.83 l 238.371 31.146 l h
+238.012 34.029 m 238.012 35.83 l 238.371 35.83 l 238.371 34.029 l h
+238.012 37.029 m 238.012 38.709 l 238.371 38.709 l 238.371 37.029 l h
+238.012 40.029 m 238.012 41.709 l 238.371 41.709 l 238.371 40.029 l h
+238.012 43.029 m 238.012 44.709 l 238.371 44.709 l 238.371 43.029 l h
+238.012 46.029 m 238.012 47.709 l 238.371 47.709 l 238.371 46.029 l h
+238.012 48.908 m 238.012 50.709 l 238.371 50.709 l 238.371 48.908 l h
+238.012 51.908 m 238.012 53.587 l 238.371 53.587 l 238.371 51.908 l h
+238.012 54.908 m 238.012 56.587 l 238.371 56.587 l 238.371 54.908 l h
+238.012 57.908 m 238.012 59.587 l 238.371 59.587 l 238.371 57.908 l h
+238.012 60.908 m 238.012 62.587 l 238.371 62.587 l 238.371 60.908 l h
+238.012 63.787 m 238.012 65.587 l 238.371 65.587 l 238.371 63.787 l h
+238.012 66.787 m 238.012 68.466 l 238.371 68.466 l 238.371 66.787 l h
+238.012 69.787 m 238.012 71.466 l 238.371 71.466 l 238.371 69.787 l h
+238.012 72.787 m 238.012 74.466 l 238.371 74.466 l 238.371 72.787 l h
+238.012 75.787 m 238.012 77.466 l 238.371 77.466 l 238.371 75.787 l h
+238.012 78.67 m 238.012 80.349 l 238.371 80.349 l 238.371 78.67 l h
+238.012 81.67 m 238.012 83.349 l 238.371 83.349 l 238.371 81.67 l h
+238.012 84.67 m 238.012 86.349 l 238.371 86.349 l 238.371 84.67 l h
+238.012 87.67 m 238.012 89.349 l 238.371 89.349 l 238.371 87.67 l h
+238.012 90.548 m 238.012 92.349 l 238.371 92.349 l 238.371 90.548 l h
+238.012 93.548 m 238.012 95.228 l 238.371 95.228 l 238.371 93.548 l h
+238.012 96.548 m 238.012 98.228 l 238.371 98.228 l 238.371 96.548 l h
+238.012 99.548 m 238.012 101.228 l 238.371 101.228 l 238.371 99.548 l h
+238.012 102.548 m 238.012 104.228 l 238.371 104.228 l 238.371 102.548 l
+ h
+238.012 105.427 m 238.012 107.228 l 238.371 107.228 l 238.371 105.427 l
+ h
+238.012 108.427 m 238.012 110.107 l 238.371 110.107 l 238.371 108.427 l
+ h
+238.012 111.427 m 238.012 113.107 l 238.371 113.107 l 238.371 111.427 l
+ h
+238.012 114.427 m 238.012 116.107 l 238.371 116.107 l 238.371 114.427 l
+ h
+238.012 117.427 m 238.012 119.107 l 238.371 119.107 l 238.371 117.427 l
+ h
+238.012 120.306 m 238.012 122.107 l 238.371 122.107 l 238.371 120.306 l
+ h
+238.012 123.306 m 238.012 124.986 l 238.371 124.986 l 238.371 123.306 l
+ h
+238.012 126.306 m 238.012 127.986 l 238.371 127.986 l 238.371 126.306 l
+ h
+238.012 129.306 m 238.012 130.986 l 238.371 130.986 l 238.371 129.306 l
+ h
+238.012 132.306 m 238.012 133.986 l 238.371 133.986 l 238.371 132.306 l
+ h
+238.012 135.189 m 238.012 136.986 l 238.371 136.986 l 238.371 135.189 l
+ h
+238.012 138.189 m 238.012 139.869 l 238.371 139.869 l 238.371 138.189 l
+ h
+238.012 141.189 m 238.012 142.869 l 238.371 142.869 l 238.371 141.189 l
+ h
+238.012 144.189 m 238.012 145.869 l 238.371 145.869 l 238.371 144.189 l
+ h
+238.012 147.189 m 238.012 148.869 l 238.371 148.869 l 238.371 147.189 l
+ h
+238.012 150.068 m 238.012 151.748 l 238.371 151.748 l 238.371 150.068 l
+ h
+238.012 153.068 m 238.012 154.748 l 238.371 154.748 l 238.371 153.068 l
+ h
+238.012 156.068 m 238.012 157.748 l 238.371 157.748 l 238.371 156.068 l
+ h
+238.012 159.068 m 238.012 160.748 l 238.371 160.748 l 238.371 159.068 l
+ h
+238.012 161.947 m 238.012 163.748 l 238.371 163.748 l 238.371 161.947 l
+ h
+238.012 164.947 m 238.012 166.627 l 238.371 166.627 l 238.371 164.947 l
+ h
+238.012 167.947 m 238.012 169.627 l 238.371 169.627 l 238.371 167.947 l
+ h
+238.012 170.947 m 238.012 172.627 l 238.371 172.627 l 238.371 170.947 l
+ h
+238.012 173.947 m 238.012 175.627 l 238.371 175.627 l 238.371 173.947 l
+ h
+238.012 176.83 m 238.012 178.627 l 238.371 178.627 l 238.371 176.83 l h
+238.012 179.83 m 238.012 181.509 l 238.371 181.509 l 238.371 179.83 l h
+238.012 182.83 m 238.012 184.509 l 238.371 184.509 l 238.371 182.83 l h
+238.012 185.83 m 238.012 187.509 l 238.371 187.509 l 238.371 185.83 l h
+238.012 188.83 m 238.012 190.509 l 238.371 190.509 l 238.371 188.83 l h
+238.012 191.709 m 238.012 193.509 l 238.371 193.509 l 238.371 191.709 l
+ h
+238.012 194.709 m 238.012 196.388 l 238.371 196.388 l 238.371 194.709 l
+ h
+238.012 197.709 m 238.012 199.388 l 238.371 199.388 l 238.371 197.709 l
+ h
+238.012 200.709 m 238.012 202.388 l 238.371 202.388 l 238.371 200.709 l
+ h
+238.012 203.709 m 238.012 205.388 l 238.371 205.388 l 238.371 203.709 l
+ h
+238.012 206.587 m 238.012 208.388 l 238.371 208.388 l 238.371 206.587 l
+ h
+238.012 209.587 m 238.012 211.267 l 238.371 211.267 l 238.371 209.587 l
+ h
+238.012 212.587 m 238.012 214.267 l 238.371 214.267 l 238.371 212.587 l
+ h
+238.012 215.587 m 238.012 217.267 l 238.371 217.267 l 238.371 215.587 l
+ h
+238.012 218.587 m 238.012 220.267 l 238.371 220.267 l 238.371 218.587 l
+ h
+238.012 221.466 m 238.012 223.146 l 238.371 223.146 l 238.371 221.466 l
+ h
+238.012 224.466 m 238.012 226.146 l 238.371 226.146 l 238.371 224.466 l
+ h
+238.25 227.587 m 239.93 227.587 l 239.93 227.228 l 238.25 227.228 l h
+241.25 227.587 m 242.93 227.587 l 242.93 227.228 l 241.25 227.228 l h
+244.129 227.587 m 245.93 227.587 l 245.93 227.228 l 244.129 227.228 l h
+247.129 227.587 m 248.809 227.587 l 248.809 227.228 l 247.129 227.228 l
+ h
+250.129 227.587 m 251.809 227.587 l 251.809 227.228 l 250.129 227.228 l
+ h
+253.129 227.587 m 254.809 227.587 l 254.809 227.228 l 253.129 227.228 l
+ h
+256.129 227.587 m 257.809 227.587 l 257.809 227.228 l 256.129 227.228 l
+ h
+259.012 227.587 m 260.809 227.587 l 260.809 227.228 l 259.012 227.228 l
+ h
+262.012 227.587 m 263.691 227.587 l 263.691 227.228 l 262.012 227.228 l
+ h
+265.012 227.587 m 266.691 227.587 l 266.691 227.228 l 265.012 227.228 l
+ h
+268.012 227.587 m 269.691 227.587 l 269.691 227.228 l 268.012 227.228 l
+ h
+271.012 227.587 m 272.691 227.587 l 272.691 227.228 l 271.012 227.228 l
+ h
+273.891 227.587 m 275.691 227.587 l 275.691 227.228 l 273.891 227.228 l
+ h
+276.891 227.587 m 278.57 227.587 l 278.57 227.228 l 276.891 227.228 l h
+279.891 227.587 m 281.57 227.587 l 281.57 227.228 l 279.891 227.228 l h
+282.891 227.587 m 284.57 227.587 l 284.57 227.228 l 282.891 227.228 l h
+285.891 227.587 m 287.57 227.587 l 287.57 227.228 l 285.891 227.228 l h
+288.77 227.587 m 290.449 227.587 l 290.449 227.228 l 288.77 227.228 l h
+291.77 227.587 m 293.449 227.587 l 293.449 227.228 l 291.77 227.228 l h
+294.77 227.587 m 296.449 227.587 l 296.449 227.228 l 294.77 227.228 l h
+297.77 227.587 m 299.449 227.587 l 299.449 227.228 l 297.77 227.228 l h
+300.648 227.587 m 302.449 227.587 l 302.449 227.228 l 300.648 227.228 l
+ h
+303.648 227.587 m 305.328 227.587 l 305.328 227.228 l 303.648 227.228 l
+ h
+306.648 227.587 m 308.328 227.587 l 308.328 227.228 l 306.648 227.228 l
+ h
+309.648 227.587 m 311.328 227.587 l 311.328 227.228 l 309.648 227.228 l
+ h
+312.648 227.587 m 314.328 227.587 l 314.328 227.228 l 312.648 227.228 l
+ h
+314.691 226.388 m 314.691 224.587 l 314.328 224.587 l 314.328 226.388 l
+ h
+314.691 223.388 m 314.691 221.709 l 314.328 221.709 l 314.328 223.388 l
+ h
+314.691 220.388 m 314.691 218.709 l 314.328 218.709 l 314.328 220.388 l
+ h
+314.691 217.388 m 314.691 215.709 l 314.328 215.709 l 314.328 217.388 l
+ h
+314.691 214.388 m 314.691 212.709 l 314.328 212.709 l 314.328 214.388 l
+ h
+314.691 211.509 m 314.691 209.709 l 314.328 209.709 l 314.328 211.509 l
+ h
+314.691 208.509 m 314.691 206.83 l 314.328 206.83 l 314.328 208.509 l h
+314.691 205.509 m 314.691 203.83 l 314.328 203.83 l 314.328 205.509 l h
+314.691 202.509 m 314.691 200.83 l 314.328 200.83 l 314.328 202.509 l h
+314.691 199.509 m 314.691 197.83 l 314.328 197.83 l 314.328 199.509 l h
+314.691 196.627 m 314.691 194.947 l 314.328 194.947 l 314.328 196.627 l
+ h
+314.691 193.627 m 314.691 191.947 l 314.328 191.947 l 314.328 193.627 l
+ h
+314.691 190.627 m 314.691 188.947 l 314.328 188.947 l 314.328 190.627 l
+ h
+314.691 187.627 m 314.691 185.947 l 314.328 185.947 l 314.328 187.627 l
+ h
+314.691 184.748 m 314.691 182.947 l 314.328 182.947 l 314.328 184.748 l
+ h
+314.691 181.748 m 314.691 180.068 l 314.328 180.068 l 314.328 181.748 l
+ h
+314.691 178.748 m 314.691 177.068 l 314.328 177.068 l 314.328 178.748 l
+ h
+314.691 175.748 m 314.691 174.068 l 314.328 174.068 l 314.328 175.748 l
+ h
+314.691 172.748 m 314.691 171.068 l 314.328 171.068 l 314.328 172.748 l
+ h
+314.691 169.869 m 314.691 168.068 l 314.328 168.068 l 314.328 169.869 l
+ h
+314.691 166.869 m 314.691 165.189 l 314.328 165.189 l 314.328 166.869 l
+ h
+314.691 163.869 m 314.691 162.189 l 314.328 162.189 l 314.328 163.869 l
+ h
+314.691 160.869 m 314.691 159.189 l 314.328 159.189 l 314.328 160.869 l
+ h
+314.691 157.869 m 314.691 156.189 l 314.328 156.189 l 314.328 157.869 l
+ h
+314.691 154.986 m 314.691 153.189 l 314.328 153.189 l 314.328 154.986 l
+ h
+314.691 151.986 m 314.691 150.306 l 314.328 150.306 l 314.328 151.986 l
+ h
+314.691 148.986 m 314.691 147.306 l 314.328 147.306 l 314.328 148.986 l
+ h
+314.691 145.986 m 314.691 144.306 l 314.328 144.306 l 314.328 145.986 l
+ h
+314.691 142.986 m 314.691 141.306 l 314.328 141.306 l 314.328 142.986 l
+ h
+314.691 140.107 m 314.691 138.306 l 314.328 138.306 l 314.328 140.107 l
+ h
+314.691 137.107 m 314.691 135.427 l 314.328 135.427 l 314.328 137.107 l
+ h
+314.691 134.107 m 314.691 132.427 l 314.328 132.427 l 314.328 134.107 l
+ h
+314.691 131.107 m 314.691 129.427 l 314.328 129.427 l 314.328 131.107 l
+ h
+314.691 128.107 m 314.691 126.427 l 314.328 126.427 l 314.328 128.107 l
+ h
+314.691 125.228 m 314.691 123.548 l 314.328 123.548 l 314.328 125.228 l
+ h
+314.691 122.228 m 314.691 120.548 l 314.328 120.548 l 314.328 122.228 l
+ h
+314.691 119.228 m 314.691 117.548 l 314.328 117.548 l 314.328 119.228 l
+ h
+314.691 116.228 m 314.691 114.548 l 314.328 114.548 l 314.328 116.228 l
+ h
+314.691 113.349 m 314.691 111.548 l 314.328 111.548 l 314.328 113.349 l
+ h
+314.691 110.349 m 314.691 108.67 l 314.328 108.67 l 314.328 110.349 l h
+314.691 107.349 m 314.691 105.67 l 314.328 105.67 l 314.328 107.349 l h
+314.691 104.349 m 314.691 102.67 l 314.328 102.67 l 314.328 104.349 l h
+314.691 101.349 m 314.691 99.67 l 314.328 99.67 l 314.328 101.349 l h
+314.691 98.466 m 314.691 96.67 l 314.328 96.67 l 314.328 98.466 l h
+314.691 95.466 m 314.691 93.787 l 314.328 93.787 l 314.328 95.466 l h
+314.691 92.466 m 314.691 90.787 l 314.328 90.787 l 314.328 92.466 l h
+314.691 89.466 m 314.691 87.787 l 314.328 87.787 l 314.328 89.466 l h
+314.691 86.466 m 314.691 84.787 l 314.328 84.787 l 314.328 86.466 l h
+314.691 83.587 m 314.691 81.787 l 314.328 81.787 l 314.328 83.587 l h
+314.691 80.587 m 314.691 78.908 l 314.328 78.908 l 314.328 80.587 l h
+314.691 77.587 m 314.691 75.908 l 314.328 75.908 l 314.328 77.587 l h
+314.691 74.587 m 314.691 72.908 l 314.328 72.908 l 314.328 74.587 l h
+314.691 71.587 m 314.691 69.908 l 314.328 69.908 l 314.328 71.587 l h
+314.691 68.709 m 314.691 66.908 l 314.328 66.908 l 314.328 68.709 l h
+314.691 65.709 m 314.691 64.029 l 314.328 64.029 l 314.328 65.709 l h
+314.691 62.709 m 314.691 61.029 l 314.328 61.029 l 314.328 62.709 l h
+314.691 59.709 m 314.691 58.029 l 314.328 58.029 l 314.328 59.709 l h
+314.691 56.709 m 314.691 55.029 l 314.328 55.029 l 314.328 56.709 l h
+314.691 53.83 m 314.691 52.146 l 314.328 52.146 l 314.328 53.83 l h
+314.691 50.83 m 314.691 49.146 l 314.328 49.146 l 314.328 50.83 l h
+314.691 47.83 m 314.691 46.146 l 314.328 46.146 l 314.328 47.83 l h
+314.691 44.83 m 314.691 43.146 l 314.328 43.146 l 314.328 44.83 l h
+314.691 41.947 m 314.691 40.146 l 314.328 40.146 l 314.328 41.947 l h
+314.691 38.947 m 314.691 37.267 l 314.328 37.267 l 314.328 38.947 l h
+314.691 35.947 m 314.691 34.267 l 314.328 34.267 l 314.328 35.947 l h
+314.691 32.947 m 314.691 31.267 l 314.328 31.267 l 314.328 32.947 l h
+314.691 29.947 m 314.691 28.267 l 314.328 28.267 l 314.328 29.947 l h
+314.691 27.068 m 314.691 25.267 l 314.328 25.267 l 314.328 27.068 l h
+314.691 24.068 m 314.691 22.388 l 314.328 22.388 l 314.328 24.068 l h
+314.691 21.068 m 314.691 19.388 l 314.328 19.388 l 314.328 21.068 l h
+314.691 18.068 m 314.691 16.388 l 314.328 16.388 l 314.328 18.068 l h
+314.691 15.068 m 314.691 13.388 l 314.328 13.388 l 314.328 15.068 l h
+314.691 12.189 m 314.691 10.388 l 314.328 10.388 l 314.328 12.189 l h
+314.691 9.189 m 314.691 7.509 l 314.328 7.509 l 314.328 9.189 l h
+314.691 6.189 m 314.691 4.509 l 314.328 4.509 l 314.328 6.189 l h
+314.691 3.189 m 314.691 1.509 l 314.328 1.509 l 314.328 3.189 l h
+313.371 1.146 m 311.691 1.146 l 311.691 1.627 l 313.371 1.627 l h
+310.371 1.146 m 308.691 1.146 l 308.691 1.627 l 310.371 1.627 l h
+307.488 1.146 m 305.691 1.146 l 305.691 1.627 l 307.488 1.627 l h
+304.488 1.146 m 302.809 1.146 l 302.809 1.627 l 304.488 1.627 l h
+301.488 1.146 m 299.809 1.146 l 299.809 1.627 l 301.488 1.627 l h
+298.488 1.146 m 296.809 1.146 l 296.809 1.627 l 298.488 1.627 l h
+295.488 1.146 m 293.809 1.146 l 293.809 1.627 l 295.488 1.627 l h
+292.609 1.146 m 290.93 1.146 l 290.93 1.627 l 292.609 1.627 l h
+289.609 1.146 m 287.93 1.146 l 287.93 1.627 l 289.609 1.627 l h
+286.609 1.146 m 284.93 1.146 l 284.93 1.627 l 286.609 1.627 l h
+283.609 1.146 m 281.93 1.146 l 281.93 1.627 l 283.609 1.627 l h
+280.73 1.146 m 278.93 1.146 l 278.93 1.627 l 280.73 1.627 l h
+277.73 1.146 m 276.051 1.146 l 276.051 1.627 l 277.73 1.627 l h
+274.73 1.146 m 273.051 1.146 l 273.051 1.627 l 274.73 1.627 l h
+271.73 1.146 m 270.051 1.146 l 270.051 1.627 l 271.73 1.627 l h
+268.73 1.146 m 267.051 1.146 l 267.051 1.627 l 268.73 1.627 l h
+265.852 1.146 m 264.051 1.146 l 264.051 1.627 l 265.852 1.627 l h
+262.852 1.146 m 261.168 1.146 l 261.168 1.627 l 262.852 1.627 l h
+259.852 1.146 m 258.168 1.146 l 258.168 1.627 l 259.852 1.627 l h
+256.852 1.146 m 255.168 1.146 l 255.168 1.627 l 256.852 1.627 l h
+253.852 1.146 m 252.168 1.146 l 252.168 1.627 l 253.852 1.627 l h
+250.969 1.146 m 249.168 1.146 l 249.168 1.627 l 250.969 1.627 l h
+247.969 1.146 m 246.289 1.146 l 246.289 1.627 l 247.969 1.627 l h
+244.969 1.146 m 243.289 1.146 l 243.289 1.627 l 244.969 1.627 l h
+241.969 1.146 m 240.289 1.146 l 240.289 1.627 l 241.969 1.627 l h
+238.969 1.146 m 238.129 1.146 l 238.129 1.627 l 238.969 1.627 l f
+1 g
+1.969 235.627 72.359 -174 re f*
+0.498039 g
+1.852 61.627 m 1.852 63.306 l 2.211 63.306 l 2.211 61.627 l h
+1.852 64.627 m 1.852 66.306 l 2.211 66.306 l 2.211 64.627 l h
+1.852 67.509 m 1.852 69.306 l 2.211 69.306 l 2.211 67.509 l h
+1.852 70.509 m 1.852 72.189 l 2.211 72.189 l 2.211 70.509 l h
+1.852 73.509 m 1.852 75.189 l 2.211 75.189 l 2.211 73.509 l h
+1.852 76.509 m 1.852 78.189 l 2.211 78.189 l 2.211 76.509 l h
+1.852 79.509 m 1.852 81.189 l 2.211 81.189 l 2.211 79.509 l h
+1.852 82.388 m 1.852 84.068 l 2.211 84.068 l 2.211 82.388 l h
+1.852 85.388 m 1.852 87.068 l 2.211 87.068 l 2.211 85.388 l h
+1.852 88.388 m 1.852 90.068 l 2.211 90.068 l 2.211 88.388 l h
+1.852 91.388 m 1.852 93.068 l 2.211 93.068 l 2.211 91.388 l h
+1.852 94.267 m 1.852 96.068 l 2.211 96.068 l 2.211 94.267 l h
+1.852 97.267 m 1.852 98.947 l 2.211 98.947 l 2.211 97.267 l h
+1.852 100.267 m 1.852 101.947 l 2.211 101.947 l 2.211 100.267 l h
+1.852 103.267 m 1.852 104.947 l 2.211 104.947 l 2.211 103.267 l h
+1.852 106.267 m 1.852 107.947 l 2.211 107.947 l 2.211 106.267 l h
+1.852 109.146 m 1.852 110.947 l 2.211 110.947 l 2.211 109.146 l h
+1.852 112.146 m 1.852 113.83 l 2.211 113.83 l 2.211 112.146 l h
+1.852 115.146 m 1.852 116.83 l 2.211 116.83 l 2.211 115.146 l h
+1.852 118.146 m 1.852 119.83 l 2.211 119.83 l 2.211 118.146 l h
+1.852 121.146 m 1.852 122.83 l 2.211 122.83 l 2.211 121.146 l h
+1.852 124.029 m 1.852 125.83 l 2.211 125.83 l 2.211 124.029 l h
+1.852 127.029 m 1.852 128.709 l 2.211 128.709 l 2.211 127.029 l h
+1.852 130.029 m 1.852 131.709 l 2.211 131.709 l 2.211 130.029 l h
+1.852 133.029 m 1.852 134.709 l 2.211 134.709 l 2.211 133.029 l h
+1.852 136.029 m 1.852 137.709 l 2.211 137.709 l 2.211 136.029 l h
+1.852 138.908 m 1.852 140.709 l 2.211 140.709 l 2.211 138.908 l h
+1.852 141.908 m 1.852 143.587 l 2.211 143.587 l 2.211 141.908 l h
+1.852 144.908 m 1.852 146.587 l 2.211 146.587 l 2.211 144.908 l h
+1.852 147.908 m 1.852 149.587 l 2.211 149.587 l 2.211 147.908 l h
+1.852 150.908 m 1.852 152.587 l 2.211 152.587 l 2.211 150.908 l h
+1.852 153.787 m 1.852 155.466 l 2.211 155.466 l 2.211 153.787 l h
+1.852 156.787 m 1.852 158.466 l 2.211 158.466 l 2.211 156.787 l h
+1.852 159.787 m 1.852 161.466 l 2.211 161.466 l 2.211 159.787 l h
+1.852 162.787 m 1.852 164.466 l 2.211 164.466 l 2.211 162.787 l h
+1.852 165.67 m 1.852 167.466 l 2.211 167.466 l 2.211 165.67 l h
+1.852 168.67 m 1.852 170.349 l 2.211 170.349 l 2.211 168.67 l h
+1.852 171.67 m 1.852 173.349 l 2.211 173.349 l 2.211 171.67 l h
+1.852 174.67 m 1.852 176.349 l 2.211 176.349 l 2.211 174.67 l h
+1.852 177.67 m 1.852 179.349 l 2.211 179.349 l 2.211 177.67 l h
+1.852 180.548 m 1.852 182.349 l 2.211 182.349 l 2.211 180.548 l h
+1.852 183.548 m 1.852 185.228 l 2.211 185.228 l 2.211 183.548 l h
+1.852 186.548 m 1.852 188.228 l 2.211 188.228 l 2.211 186.548 l h
+1.852 189.548 m 1.852 191.228 l 2.211 191.228 l 2.211 189.548 l h
+1.852 192.548 m 1.852 194.228 l 2.211 194.228 l 2.211 192.548 l h
+1.852 195.427 m 1.852 197.228 l 2.211 197.228 l 2.211 195.427 l h
+1.852 198.427 m 1.852 200.107 l 2.211 200.107 l 2.211 198.427 l h
+1.852 201.427 m 1.852 203.107 l 2.211 203.107 l 2.211 201.427 l h
+1.852 204.427 m 1.852 206.107 l 2.211 206.107 l 2.211 204.427 l h
+1.852 207.427 m 1.852 209.107 l 2.211 209.107 l 2.211 207.427 l h
+1.852 210.306 m 1.852 212.107 l 2.211 212.107 l 2.211 210.306 l h
+1.852 213.306 m 1.852 214.986 l 2.211 214.986 l 2.211 213.306 l h
+1.852 216.306 m 1.852 217.986 l 2.211 217.986 l 2.211 216.306 l h
+1.852 219.306 m 1.852 220.986 l 2.211 220.986 l 2.211 219.306 l h
+1.852 222.306 m 1.852 223.986 l 2.211 223.986 l 2.211 222.306 l h
+1.852 225.189 m 1.852 226.869 l 2.211 226.869 l 2.211 225.189 l h
+1.852 228.189 m 1.852 229.869 l 2.211 229.869 l 2.211 228.189 l h
+1.852 231.189 m 1.852 232.869 l 2.211 232.869 l 2.211 231.189 l h
+1.852 234.189 m 1.852 235.627 l 1.852 235.748 1.852 235.869 1.969 235.869
+ c 2.211 235.869 l 2.211 235.509 l 1.969 235.509 l 2.211 235.627 l 2.211
+ 234.189 l h
+3.531 235.869 m 5.211 235.869 l 5.211 235.509 l 3.531 235.509 l h
+6.41 235.869 m 8.211 235.869 l 8.211 235.509 l 6.41 235.509 l h
+9.41 235.869 m 11.09 235.869 l 11.09 235.509 l 9.41 235.509 l h
+12.41 235.869 m 14.09 235.869 l 14.09 235.509 l 12.41 235.509 l h
+15.41 235.869 m 17.09 235.869 l 17.09 235.509 l 15.41 235.509 l h
+18.41 235.869 m 20.09 235.869 l 20.09 235.509 l 18.41 235.509 l h
+21.289 235.869 m 23.09 235.869 l 23.09 235.509 l 21.289 235.509 l h
+24.289 235.869 m 25.969 235.869 l 25.969 235.509 l 24.289 235.509 l h
+27.289 235.869 m 28.969 235.869 l 28.969 235.509 l 27.289 235.509 l h
+30.289 235.869 m 31.969 235.869 l 31.969 235.509 l 30.289 235.509 l h
+33.289 235.869 m 34.969 235.869 l 34.969 235.509 l 33.289 235.509 l h
+36.168 235.869 m 37.852 235.869 l 37.852 235.509 l 36.168 235.509 l h
+39.168 235.869 m 40.852 235.869 l 40.852 235.509 l 39.168 235.509 l h
+42.168 235.869 m 43.852 235.869 l 43.852 235.509 l 42.168 235.509 l h
+45.168 235.869 m 46.852 235.869 l 46.852 235.509 l 45.168 235.509 l h
+48.051 235.869 m 49.852 235.869 l 49.852 235.509 l 48.051 235.509 l h
+51.051 235.869 m 52.73 235.869 l 52.73 235.509 l 51.051 235.509 l h
+54.051 235.869 m 55.73 235.869 l 55.73 235.509 l 54.051 235.509 l h
+57.051 235.869 m 58.73 235.869 l 58.73 235.509 l 57.051 235.509 l h
+60.051 235.869 m 61.73 235.869 l 61.73 235.509 l 60.051 235.509 l h
+62.93 235.869 m 64.73 235.869 l 64.73 235.509 l 62.93 235.509 l h
+65.93 235.869 m 67.609 235.869 l 67.609 235.509 l 65.93 235.509 l h
+68.93 235.869 m 70.609 235.869 l 70.609 235.509 l 68.93 235.509 l h
+71.93 235.869 m 73.609 235.869 l 73.609 235.509 l 71.93 235.509 l h
+74.57 235.146 m 74.57 233.466 l 74.09 233.466 l 74.09 235.146 l h
+74.57 232.146 m 74.57 230.466 l 74.09 230.466 l 74.09 232.146 l h
+74.57 229.146 m 74.57 227.466 l 74.09 227.466 l 74.09 229.146 l h
+74.57 226.146 m 74.57 224.466 l 74.09 224.466 l 74.09 226.146 l h
+74.57 223.267 m 74.57 221.466 l 74.09 221.466 l 74.09 223.267 l h
+74.57 220.267 m 74.57 218.587 l 74.09 218.587 l 74.09 220.267 l h
+74.57 217.267 m 74.57 215.587 l 74.09 215.587 l 74.09 217.267 l h
+74.57 214.267 m 74.57 212.587 l 74.09 212.587 l 74.09 214.267 l h
+74.57 211.267 m 74.57 209.587 l 74.09 209.587 l 74.09 211.267 l h
+74.57 208.388 m 74.57 206.587 l 74.09 206.587 l 74.09 208.388 l h
+74.57 205.388 m 74.57 203.709 l 74.09 203.709 l 74.09 205.388 l h
+74.57 202.388 m 74.57 200.709 l 74.09 200.709 l 74.09 202.388 l h
+74.57 199.388 m 74.57 197.709 l 74.09 197.709 l 74.09 199.388 l h
+74.57 196.388 m 74.57 194.709 l 74.09 194.709 l 74.09 196.388 l h
+74.57 193.509 m 74.57 191.709 l 74.09 191.709 l 74.09 193.509 l h
+74.57 190.509 m 74.57 188.83 l 74.09 188.83 l 74.09 190.509 l h
+74.57 187.509 m 74.57 185.83 l 74.09 185.83 l 74.09 187.509 l h
+74.57 184.509 m 74.57 182.83 l 74.09 182.83 l 74.09 184.509 l h
+74.57 181.509 m 74.57 179.83 l 74.09 179.83 l 74.09 181.509 l h
+74.57 178.627 m 74.57 176.83 l 74.09 176.83 l 74.09 178.627 l h
+74.57 175.627 m 74.57 173.947 l 74.09 173.947 l 74.09 175.627 l h
+74.57 172.627 m 74.57 170.947 l 74.09 170.947 l 74.09 172.627 l h
+74.57 169.627 m 74.57 167.947 l 74.09 167.947 l 74.09 169.627 l h
+74.57 166.627 m 74.57 164.947 l 74.09 164.947 l 74.09 166.627 l h
+74.57 163.748 m 74.57 162.068 l 74.09 162.068 l 74.09 163.748 l h
+74.57 160.748 m 74.57 159.068 l 74.09 159.068 l 74.09 160.748 l h
+74.57 157.748 m 74.57 156.068 l 74.09 156.068 l 74.09 157.748 l h
+74.57 154.748 m 74.57 153.068 l 74.09 153.068 l 74.09 154.748 l h
+74.57 151.869 m 74.57 150.068 l 74.09 150.068 l 74.09 151.869 l h
+74.57 148.869 m 74.57 147.189 l 74.09 147.189 l 74.09 148.869 l h
+74.57 145.869 m 74.57 144.189 l 74.09 144.189 l 74.09 145.869 l h
+74.57 142.869 m 74.57 141.189 l 74.09 141.189 l 74.09 142.869 l h
+74.57 139.869 m 74.57 138.189 l 74.09 138.189 l 74.09 139.869 l h
+74.57 136.986 m 74.57 135.189 l 74.09 135.189 l 74.09 136.986 l h
+74.57 133.986 m 74.57 132.306 l 74.09 132.306 l 74.09 133.986 l h
+74.57 130.986 m 74.57 129.306 l 74.09 129.306 l 74.09 130.986 l h
+74.57 127.986 m 74.57 126.306 l 74.09 126.306 l 74.09 127.986 l h
+74.57 124.986 m 74.57 123.306 l 74.09 123.306 l 74.09 124.986 l h
+74.57 122.107 m 74.57 120.306 l 74.09 120.306 l 74.09 122.107 l h
+74.57 119.107 m 74.57 117.427 l 74.09 117.427 l 74.09 119.107 l h
+74.57 116.107 m 74.57 114.427 l 74.09 114.427 l 74.09 116.107 l h
+74.57 113.107 m 74.57 111.427 l 74.09 111.427 l 74.09 113.107 l h
+74.57 110.107 m 74.57 108.427 l 74.09 108.427 l 74.09 110.107 l h
+74.57 107.228 m 74.57 105.427 l 74.09 105.427 l 74.09 107.228 l h
+74.57 104.228 m 74.57 102.548 l 74.09 102.548 l 74.09 104.228 l h
+74.57 101.228 m 74.57 99.548 l 74.09 99.548 l 74.09 101.228 l h
+74.57 98.228 m 74.57 96.548 l 74.09 96.548 l 74.09 98.228 l h
+74.57 95.228 m 74.57 93.548 l 74.09 93.548 l 74.09 95.228 l h
+74.57 92.349 m 74.57 90.67 l 74.09 90.67 l 74.09 92.349 l h
+74.57 89.349 m 74.57 87.67 l 74.09 87.67 l 74.09 89.349 l h
+74.57 86.349 m 74.57 84.67 l 74.09 84.67 l 74.09 86.349 l h
+74.57 83.349 m 74.57 81.67 l 74.09 81.67 l 74.09 83.349 l h
+74.57 80.466 m 74.57 78.67 l 74.09 78.67 l 74.09 80.466 l h
+74.57 77.466 m 74.57 75.787 l 74.09 75.787 l 74.09 77.466 l h
+74.57 74.466 m 74.57 72.787 l 74.09 72.787 l 74.09 74.466 l h
+74.57 71.466 m 74.57 69.787 l 74.09 69.787 l 74.09 71.466 l h
+74.57 68.466 m 74.57 66.787 l 74.09 66.787 l 74.09 68.466 l h
+74.57 65.587 m 74.57 63.787 l 74.09 63.787 l 74.09 65.587 l h
+74.57 62.587 m 74.57 61.627 l 74.57 61.509 74.449 61.388 74.328 61.388 
+c 73.609 61.388 l 73.609 61.869 l 74.328 61.869 l 74.09 61.627 l 74.09 62.587
+ l h
+72.289 61.388 m 70.609 61.388 l 70.609 61.869 l 72.289 61.869 l h
+69.289 61.388 m 67.609 61.388 l 67.609 61.869 l 69.289 61.869 l h
+66.41 61.388 m 64.609 61.388 l 64.609 61.869 l 66.41 61.869 l h
+63.41 61.388 m 61.73 61.388 l 61.73 61.869 l 63.41 61.869 l h
+60.41 61.388 m 58.73 61.388 l 58.73 61.869 l 60.41 61.869 l h
+57.41 61.388 m 55.73 61.388 l 55.73 61.869 l 57.41 61.869 l h
+54.41 61.388 m 52.73 61.388 l 52.73 61.869 l 54.41 61.869 l h
+51.531 61.388 m 49.73 61.388 l 49.73 61.869 l 51.531 61.869 l h
+48.531 61.388 m 46.852 61.388 l 46.852 61.869 l 48.531 61.869 l h
+45.531 61.388 m 43.852 61.388 l 43.852 61.869 l 45.531 61.869 l h
+42.531 61.388 m 40.852 61.388 l 40.852 61.869 l 42.531 61.869 l h
+39.531 61.388 m 37.852 61.388 l 37.852 61.869 l 39.531 61.869 l h
+36.648 61.388 m 34.852 61.388 l 34.852 61.869 l 36.648 61.869 l h
+33.648 61.388 m 31.969 61.388 l 31.969 61.869 l 33.648 61.869 l h
+30.648 61.388 m 28.969 61.388 l 28.969 61.869 l 30.648 61.869 l h
+27.648 61.388 m 25.969 61.388 l 25.969 61.869 l 27.648 61.869 l h
+24.648 61.388 m 22.969 61.388 l 22.969 61.869 l 24.648 61.869 l h
+21.77 61.388 m 19.969 61.388 l 19.969 61.869 l 21.77 61.869 l h
+18.77 61.388 m 17.09 61.388 l 17.09 61.869 l 18.77 61.869 l h
+15.77 61.388 m 14.09 61.388 l 14.09 61.869 l 15.77 61.869 l h
+12.77 61.388 m 11.09 61.388 l 11.09 61.869 l 12.77 61.869 l h
+9.77 61.388 m 8.09 61.388 l 8.09 61.869 l 9.77 61.869 l h
+6.891 61.388 m 5.211 61.388 l 5.211 61.869 l 6.891 61.869 l h
+3.891 61.388 m 2.211 61.388 l 2.211 61.869 l 3.891 61.869 l f
+0.0509804 g
+162.289 162.306 m 162.289 154.986 l 161.809 154.986 l 161.809 162.306 l
+ h
+163.25 155.466 m 162.051 151.146 l 160.73 155.466 l f
+162.168 186.908 m 162.051 178.627 l 162.531 178.627 l 162.648 186.908 l
+ h
+161.09 179.107 m 162.289 174.908 l 163.609 179.107 l f
+0.498039 g
+169.852 207.787 m 169.852 200.466 l 169.371 200.466 l 169.371 207.787 l
+ h
+170.93 200.83 m 169.609 196.627 l 168.41 200.83 l f
+157.852 211.986 m 157.852 200.466 l 157.371 200.466 l 157.371 211.986 l
+ h
+158.809 200.83 m 157.609 196.627 l 156.289 200.83 l f
+0.0509804 g
+152.93 215.83 m 152.93 200.466 l 152.57 200.466 l 152.57 215.83 l h
+154.012 200.83 m 152.809 196.627 l 151.488 200.83 l f
+1 g
+143.449 229.627 51.48 -15.957 re f*
+0.65098 g
+143.211 229.627 m 143.211 229.748 143.328 229.869 143.449 229.869 c 194.93
+ 229.869 l 195.051 229.869 195.168 229.748 195.168 229.627 c 195.168 213.67
+ l 195.168 213.548 195.051 213.548 194.93 213.548 c 143.449 213.548 l 143.328
+ 213.548 143.211 213.548 143.211 213.67 c h
+143.57 213.67 m 143.449 213.908 l 194.93 213.908 l 194.691 213.67 l 194.691
+ 229.627 l 194.93 229.509 l 143.449 229.509 l 143.57 229.627 l f
+0.0509804 g
+162.531 261.189 m 162.531 253.986 l 161.93 253.986 l 161.93 261.189 l h
+163.488 254.349 m 162.289 250.146 l 160.969 254.349 l f
+1 g
+131.328 226.267 51.602 -17.52 re f*
+0.65098 g
+131.09 226.267 m 131.09 226.388 131.211 226.509 131.328 226.509 c 182.93
+ 226.509 l 183.051 226.509 183.051 226.388 183.051 226.267 c 183.051 208.748
+ l 183.051 208.627 183.051 208.509 182.93 208.509 c 131.328 208.509 l 131.211
+ 208.509 131.09 208.627 131.09 208.748 c h
+131.57 208.748 m 131.328 208.986 l 182.93 208.986 l 182.691 208.748 l 182.691
+ 226.267 l 182.93 226.029 l 131.328 226.029 l 131.57 226.267 l f
+1 g
+126.77 223.146 51.602 -17.52 re f*
+0.0509804 g
+126.531 223.146 m 126.531 223.267 126.648 223.388 126.77 223.388 c 178.371
+ 223.388 l 178.488 223.388 178.609 223.267 178.609 223.146 c 178.609 205.627
+ l 178.609 205.509 178.488 205.388 178.371 205.388 c 126.77 205.388 l 126.648
+ 205.388 126.531 205.509 126.531 205.627 c h
+127.012 205.627 m 126.77 205.869 l 178.371 205.869 l 178.129 205.627 l 
+178.129 223.146 l 178.371 222.908 l 126.77 222.908 l 127.012 223.146 l f
+BT
+4.199998 0 0 4.199998 134.3297 215.46791 Tm
+/f-0-0 1 Tf
+[(C)8(o)42(n)13(s)-14(t)-8(r)-10(u)13(c)-14(t)-8( )-65(b)13(r)-9(e)41(a)
+42(k)-14(e)42(n)13(d)]TJ
+-0.914286 -1.228571 Td
+[(g)13(r)-10(a)42(p)14(h)13( )-94(f)-8(o)42(r)-10( )-36(e)42(a)42(c)-15
+(h)14( )-65(s)-15(e)42(g)14(m)4(e)42(n)13(t)]TJ
+ET
+1 g
+129.77 261.787 m 134.211 279.67 l 194.809 279.67 l 190.371 261.787 l f*
+0.0509804 g
+129.77 261.306 m 129.648 261.306 129.531 261.427 129.41 261.548 c 129.289
+ 261.67 129.289 261.787 129.289 261.908 c 133.852 279.787 l 133.852 280.029
+ 134.09 280.146 134.211 280.146 c 194.809 280.146 l 195.051 280.146 195.168
+ 280.146 195.289 280.029 c 195.289 279.908 195.41 279.67 195.289 279.548
+ c 190.852 261.67 l 190.852 261.548 190.609 261.306 190.371 261.306 c h
+190.371 262.267 m 189.891 261.908 l 194.449 279.787 l 194.809 279.189 l
+ 134.211 279.189 l 134.691 279.548 l 130.25 261.67 l 129.77 262.267 l f
+0 g
+BT
+5.159998 0 0 5.159998 140.329698 271.987887 Tm
+/f-0-0 1 Tf
+[(S)-7(am)-7(pl)-12(e)21( )115(A)-7(l)-11(i)-10(gn)-4(m)19(en)19(ts)11( )
+]TJ
+2.581395 -1.186047 Td
+[(\()7(B)-7(A)-7(M)19(s)-12(\))]TJ
+ET
+0.498039 g
+BT
+5.159998 0 0 5.159998 184.24968 221.107907 Tm
+/f-1-0 1 Tf
+(\205)Tj
+ET
+0.0509804 g
+152.93 242.349 m 152.93 226.986 l 152.57 226.986 l 152.57 242.349 l h
+154.012 227.466 m 152.809 223.146 l 151.488 227.466 l f
+0.498039 g
+157.852 241.869 m 157.852 230.349 l 157.371 230.349 l 157.371 241.869 l
+ h
+158.809 230.83 m 157.609 226.509 l 156.289 230.83 l f
+169.852 240.67 m 169.852 233.349 l 169.371 233.349 l 169.371 240.67 l h
+170.93 233.709 m 169.609 229.509 l 168.41 233.709 l f
+1 g
+139.25 250.146 46.078 -12.598 re f*
+0.0509804 g
+139.012 250.146 m 139.012 250.267 139.129 250.388 139.25 250.388 c 185.328
+ 250.388 l 185.449 250.388 185.57 250.267 185.57 250.146 c 185.57 237.548
+ l 185.57 237.427 185.449 237.306 185.328 237.306 c 139.25 237.306 l 139.129
+ 237.306 139.012 237.427 139.012 237.548 c h
+139.488 237.548 m 139.25 237.67 l 185.328 237.67 l 185.09 237.548 l 185.09
+ 250.146 l 185.328 249.908 l 139.25 249.908 l 139.488 250.146 l f
+BT
+4.199998 0 0 4.199998 149.569694 244.867898 Tm
+/f-0-0 1 Tf
+[(P)10(a)42(r)-10(a)42(l)22(l)-7(e)-15(l)-6(i)22(z)-14(e)13( )-151(o)42
+(n)13( )]TJ
+-1.085714 -1.228571 Td
+[(g)13(e)42(n)13(o)42(m)-24(e)42( )-151(s)-14(e)42(g)13(m)5(e)41(n)14(t)
+-8(s)]TJ
+ET
+1 g
+139.25 196.267 46.078 -12.598 re f*
+0.0509804 g
+139.012 196.267 m 139.012 196.388 139.129 196.509 139.25 196.509 c 185.328
+ 196.509 l 185.449 196.509 185.57 196.388 185.57 196.267 c 185.57 183.67
+ l 185.57 183.548 185.449 183.427 185.328 183.427 c 139.25 183.427 l 139.129
+ 183.427 139.012 183.548 139.012 183.67 c h
+139.488 183.67 m 139.25 183.787 l 185.328 183.787 l 185.09 183.67 l 185.09
+ 196.267 l 185.328 196.029 l 139.25 196.029 l 139.488 196.267 l f
+BT
+4.199998 0 0 4.199998 147.769695 190.987919 Tm
+/f-0-0 1 Tf
+[(M)4(e)42(r)-10(g)14(e)42( )-123(s)-14(e)42(g)13(m)5(e)42(n)13(t)-8( )
+]TJ
+-0.285714 -1.228571 Td
+[(b)13(r)-10(e)42(a)42(k)-14(e)42(n)-16(d)-386(g)13(r)-10(a)42(p)13(h)14
+(s)]TJ
+ET
+1 g
+135.531 160.748 m 139.012 174.908 l 189.051 174.908 l 185.449 160.748 l
+ f*
+0.0509804 g
+135.531 160.509 m 135.41 160.509 135.289 160.509 135.289 160.627 c 135.168
+ 160.627 135.168 160.748 135.168 160.869 c 138.648 174.908 l 138.77 175.029
+ 138.891 175.146 139.012 175.146 c 189.051 175.146 l 189.168 175.146 189.168
+ 175.146 189.289 175.029 c 189.289 175.029 189.41 174.908 189.289 174.787
+ c 185.809 160.748 l 185.809 160.509 185.691 160.509 185.449 160.509 c h
+185.449 161.107 m 185.211 160.869 l 188.691 174.908 l 189.051 174.548 l
+ 139.012 174.548 l 139.371 174.787 l 135.77 160.748 l 135.531 161.107 l f
+0 g
+BT
+4.679998 0 0 4.679998 153.049692 168.907928 Tm
+/f-0-0 1 Tf
+[(G)9(e)-8(n)-8(o)-8(m)-13(e)-8( )]TJ
+-1.74359 -1.205128 Td
+[(Bre)-8(a)-8(k)-13(e)-8(n)-8(d)-238(G)8(ra)-8(p)-8(h)]TJ
+ET
+0.34902 g
+193.969 254.709 m 197.09 254.709 l 197.93 254.587 l 198.531 254.587 l 198.891
+ 254.466 l 199.371 254.466 l 199.488 254.349 l 199.609 254.349 l 199.609
+ 254.228 l 199.73 254.228 199.73 254.228 199.852 254.107 c 199.852 179.83
+ l 199.73 179.83 199.73 179.709 199.609 179.709 c 199.488 179.709 199.488
+ 179.587 199.488 179.587 c 199.129 179.587 l 198.891 179.466 l 198.648 179.466
+ l 197.93 179.349 l 197.09 179.349 l 196.129 179.228 l 193.969 179.228 l
+ 193.969 180.068 l 196.129 180.068 l 196.969 180.189 l 197.809 180.189 l
+ 198.41 180.306 l 198.891 180.306 l 199.129 180.427 l 199.25 180.427 l 199.129
+ 180.427 l 199.25 180.427 l 199.129 180.306 l 199.012 180.068 l 199.012 
+253.869 l 199.129 253.627 l 199.25 253.509 l 199.129 253.627 l 199.25 253.509
+ l 199.129 253.627 l 198.77 253.627 l 198.531 253.748 l 197.809 253.748 
+l 196.969 253.869 l 193.969 253.869 l f
+0 g
+BT
+4.439998 0 0 4.439998 201.649673 217.507909 Tm
+/f-1-0 1 Tf
+[(P)-9(h)-10(a)-12(s)-11(e)-12( )35(1)]TJ
+ET
+0.0509804 g
+162.531 84.306 m 162.648 74.83 l 161.93 74.83 l 161.809 84.306 l h
+163.609 75.306 m 162.289 70.986 l 160.969 75.306 l f
+0.498039 g
+169.609 108.787 m 169.609 101.466 l 169.25 101.466 l 169.25 108.787 l h
+170.691 101.947 m 169.371 97.627 l 168.168 101.947 l f
+157.609 112.986 m 157.609 101.466 l 157.129 101.466 l 157.129 112.986 l
+ h
+158.691 101.947 m 157.371 97.627 l 156.051 101.947 l f
+0.0509804 g
+152.809 116.83 m 152.809 101.466 l 152.328 101.466 l 152.328 116.83 l h
+153.77 101.947 m 152.57 97.627 l 151.25 101.947 l f
+1 g
+143.211 130.748 51.48 -15.961 re f*
+0.65098 g
+142.969 130.748 m 142.969 130.869 143.09 130.986 143.211 130.986 c 194.691
+ 130.986 l 194.809 130.986 194.93 130.869 194.93 130.748 c 194.93 114.787
+ l 194.93 114.67 194.809 114.548 194.691 114.548 c 143.211 114.548 l 143.09
+ 114.548 142.969 114.67 142.969 114.787 c h
+143.328 114.787 m 143.211 115.029 l 194.691 115.029 l 194.449 114.787 l
+ 194.449 130.748 l 194.691 130.509 l 143.211 130.509 l 143.328 130.748 l
+ f
+1 g
+131.09 127.388 51.602 -17.641 re f*
+0.65098 g
+130.969 127.388 m 130.969 127.509 130.969 127.509 131.09 127.509 c 182.691
+ 127.509 l 182.809 127.509 182.93 127.509 182.93 127.388 c 182.93 109.748
+ l 182.93 109.627 182.809 109.509 182.691 109.509 c 131.09 109.509 l 130.969
+ 109.509 130.969 109.627 130.969 109.748 c h
+131.328 109.748 m 131.09 109.986 l 182.691 109.986 l 182.449 109.748 l 
+182.449 127.388 l 182.691 127.146 l 131.09 127.146 l 131.328 127.388 l f
+1 g
+126.531 124.267 51.598 -17.641 re f*
+0.0509804 g
+126.41 124.267 m 126.41 124.388 126.41 124.388 126.531 124.388 c 178.129
+ 124.388 l 178.25 124.388 178.371 124.388 178.371 124.267 c 178.371 106.627
+ l 178.371 106.509 178.25 106.388 178.129 106.388 c 126.531 106.388 l 126.41
+ 106.388 126.41 106.509 126.41 106.627 c h
+126.77 106.627 m 126.531 106.869 l 178.129 106.869 l 177.891 106.627 l 
+177.891 124.267 l 178.129 124.029 l 126.531 124.029 l 126.77 124.267 l f
+BT
+4.199998 0 0 4.199998 129.649702 116.467949 Tm
+/f-0-0 1 Tf
+[(D)8(i)22(s)-14(c)-15(o)42(v)15(e)41(r)-9( )-123(a)42(n)14(d)13( )-94(s)
+-14(c)-14(o)42(r)-10(e)42( )-65(S)9(V)10(s)]TJ
+0.942857 -1.228571 Td
+[(f)-8(o)42(r)-10( )-36(e)42(a)41(c)-14(h)13( )-93(g)13(r)-10(a)42(p)13
+(h)14( )-65(e)42(d)13(g)13(e)]TJ
+ET
+0.498039 g
+BT
+5.159998 0 0 5.159998 184.00968 122.107947 Tm
+/f-1-0 1 Tf
+(\205)Tj
+ET
+0.0509804 g
+152.809 143.466 m 152.809 127.986 l 152.328 127.986 l 152.328 143.466 l
+ h
+153.77 128.466 m 152.57 124.267 l 151.25 128.466 l f
+0.498039 g
+157.609 142.869 m 157.609 131.466 l 157.129 131.466 l 157.129 142.869 l
+ h
+158.691 131.83 m 157.371 127.627 l 156.051 131.83 l f
+169.609 141.67 m 169.609 134.349 l 169.25 134.349 l 169.25 141.67 l h
+170.691 134.83 m 169.371 130.509 l 168.168 134.83 l f
+1 g
+139.012 151.146 46.078 -12.598 re f*
+0.0509804 g
+138.77 151.146 m 138.77 151.267 138.891 151.388 139.012 151.388 c 185.09
+ 151.388 l 185.211 151.388 185.328 151.267 185.328 151.146 c 185.328 138.548
+ l 185.328 138.427 185.211 138.306 185.09 138.306 c 139.012 138.306 l 138.891
+ 138.306 138.77 138.427 138.77 138.548 c h
+139.25 138.548 m 139.012 138.787 l 185.09 138.787 l 184.852 138.548 l 184.852
+ 151.146 l 185.09 151.029 l 139.012 151.029 l 139.25 151.146 l f
+BT
+4.199998 0 0 4.199998 143.449696 145.987937 Tm
+/f-0-0 1 Tf
+[(P)10(a)42(r)-10(a)42(l)22(l)-7(e)-15(l)-6(i)22(z)-14(e)13( )-151(o)42
+(n)13( )-36(g)13(r)-10(a)42(p)14(h)13( )]TJ
+3.085714 -1.228571 Td
+[(e)42(d)13(g)13(e)42(s)]TJ
+ET
+1 g
+139.012 97.267 46.078 -12.598 re f*
+0.0509804 g
+138.77 97.267 m 138.77 97.388 138.891 97.509 139.012 97.509 c 185.09 97.509
+ l 185.211 97.509 185.328 97.388 185.328 97.267 c 185.328 84.67 l 185.328
+ 84.548 185.211 84.427 185.09 84.427 c 139.012 84.427 l 138.891 84.427 138.77
+ 84.548 138.77 84.67 c h
+139.25 84.67 m 139.012 84.908 l 185.09 84.908 l 184.852 84.67 l 184.852
+ 97.267 l 185.09 97.146 l 139.012 97.146 l 139.25 97.267 l f
+BT
+4.199998 0 0 4.199998 150.889693 89.46796 Tm
+/f-0-0 1 Tf
+[(M)4(e)42(r)-10(g)14(e)42( )-123(C)8(a)42(l)22(l)23(s)]TJ
+ET
+0 g
+BT
+4.439998 0 0 4.439998 103.249712 116.347949 Tm
+/f-1-0 1 Tf
+[(P)-9(h)-10(a)-12(s)-11(e)-12( )35(2)]TJ
+ET
+1 g
+129.77 53.107 m 134.211 70.986 l 194.809 70.986 l 190.371 53.107 l f*
+0.0509804 g
+129.77 52.627 m 129.648 52.627 129.531 52.748 129.41 52.869 c 129.289 52.986
+ 129.289 53.107 129.289 53.228 c 133.852 71.107 l 133.852 71.349 134.09 
+71.466 134.211 71.466 c 194.809 71.466 l 195.051 71.466 195.168 71.466 195.289
+ 71.349 c 195.289 71.228 195.41 70.986 195.289 70.869 c 190.852 53.107 l
+ 190.852 52.869 190.609 52.627 190.371 52.627 c h
+190.371 53.587 m 189.891 53.228 l 194.449 71.107 l 194.809 70.509 l 134.211
+ 70.509 l 134.691 70.869 l 130.25 53.107 l 129.77 53.587 l f
+0 g
+BT
+5.159998 0 0 5.159998 141.169697 63.30797 Tm
+/f-0-0 1 Tf
+[(S)-7(V)-8( )22(an)-4(d )43(In)-3(de)-4(l)-243(Cal)-11(l)-10(s)11( )]TJ
+2.511628 -1.186047 Td
+[(\()7(V)-7(CF)7(s)-11(\))]TJ
+ET
+0.34902 g
+127.609 154.748 m 124.488 154.748 l 123.648 154.627 l 122.93 154.627 l 
+122.691 154.509 l 122.211 154.509 l 122.09 154.388 l 121.969 154.388 l 121.852
+ 154.267 l 121.73 154.146 l 121.73 154.029 121.609 154.029 121.609 153.908
+ c 121.609 78.306 l 121.609 78.189 121.73 78.189 121.73 78.068 c 121.969
+ 77.83 l 122.211 77.83 l 122.449 77.709 l 122.93 77.709 l 123.648 77.587
+ l 124.488 77.466 l 127.609 77.466 l 127.609 78.306 l 125.449 78.306 l 124.488
+ 78.427 l 123.77 78.427 l 123.051 78.548 l 122.449 78.548 l 122.328 78.67
+ l 122.449 78.67 l 122.328 78.67 l 122.449 78.548 l 122.449 78.306 l 122.449
+ 153.908 l 122.449 153.67 l 122.328 153.548 l 122.449 153.67 l 122.328 153.548
+ l 122.449 153.67 l 122.809 153.67 l 123.051 153.787 l 123.77 153.787 l 
+124.488 153.908 l 127.609 153.908 l f
+0.0509804 g
+BT
+4.679998 0 0 4.679998 10.969749 227.827905 Tm
+/f-0-0 1 Tf
+[(C)4(o)-8(n)-8(s)13(t)-4(ru)-8(c)-13(t)-4( )47(b)-8(re)-9(a)-8(k)-12(e)
+-8(n)-8(d)-213(g)-8(ra)-9(p)-7(h)-8( )]TJ
+-0.025641 -1.205128 Td
+[(f)-30(o)-8(r )47(e)-8(a)-8(c)-13(h)-8( )47(g)-8(e)-8(n)-8(o)-7(m)-14(e)
+-8( )73(s)13(e)-8(g)-8(m)-13(e)-8(n)-8(t)]TJ
+ET
+0.65098 g
+126.77 223.029 m 74.211 235.509 l 74.328 235.869 l 126.891 223.509 l f
+74.57 61.509 m 127.012 205.509 l 126.648 205.627 l 74.09 61.627 l f
+0.25098 g
+274.488 194.349 m 274.609 185.83 l 274.129 185.83 l 274.129 194.349 l h
+275.57 186.306 m 274.371 182.107 l 273.051 186.306 l f
+1 g
+247.609 190.869 m 251.09 205.029 l 301.129 205.029 l 297.648 190.869 l f*
+0.25098 g
+247.609 190.627 m 247.488 190.627 247.371 190.627 247.371 190.748 c 247.25
+ 190.748 247.25 190.869 247.25 190.986 c 250.852 205.029 l 250.852 205.146
+ 250.969 205.267 251.09 205.267 c 301.129 205.267 l 301.25 205.267 301.371
+ 205.267 301.371 205.146 c 301.488 205.146 301.488 205.029 301.488 204.908
+ c 297.891 190.869 l 297.891 190.627 297.77 190.627 297.648 190.627 c h
+297.648 191.228 m 297.289 190.986 l 300.891 205.029 l 301.129 204.67 l 
+251.09 204.67 l 251.449 204.908 l 247.969 190.869 l 247.609 191.228 l f
+BT
+4.679998 0 0 4.679998 265.249648 199.027916 Tm
+/f-0-0 1 Tf
+[(G)9(e)-8(n)-8(o)-8(m)-13(e)-8( )]TJ
+-1.74359 -1.205128 Td
+[(Bre)-8(a)-8(k)-13(e)-8(n)-8(d)-238(G)8(ra)-8(p)-8(h)]TJ
+ET
+1 g
+251.328 182.107 46.082 -12.719 re f*
+0.25098 g
+251.09 182.107 m 251.09 182.228 251.211 182.228 251.328 182.228 c 297.41
+ 182.228 l 297.531 182.228 297.648 182.228 297.648 182.107 c 297.648 169.388
+ l 297.648 169.267 297.531 169.146 297.41 169.146 c 251.328 169.146 l 251.211
+ 169.146 251.09 169.267 251.09 169.388 c h
+251.57 169.388 m 251.328 169.627 l 297.41 169.627 l 297.168 169.388 l 297.168
+ 182.107 l 297.41 181.869 l 251.328 181.869 l 251.57 182.107 l f
+BT
+4.199998 0 0 4.199998 255.649651 174.307926 Tm
+/f-0-0 1 Tf
+[(S)10(e)42(l)22(e)42(c)-15(t)-7( )-123(a)42( )-36(g)13(r)-10(a)42(p)13
+(h)14( )-65(e)42(d)13(g)13(e)]TJ
+ET
+1 g
+244.73 7.267 m 248.328 21.306 l 304.012 21.306 l 300.41 7.267 l f*
+0.25098 g
+244.73 6.908 m 244.73 6.908 244.609 6.908 244.488 7.029 c 244.488 7.267
+ l 247.969 21.427 l 247.969 21.548 248.09 21.67 248.328 21.67 c 304.012 
+21.67 l 304.129 21.67 304.129 21.548 304.25 21.548 c 304.25 21.427 304.371
+ 21.306 304.25 21.189 c 300.77 7.146 l 300.77 7.029 300.648 6.908 300.41
+ 6.908 c h
+300.41 7.509 m 300.168 7.267 l 303.648 21.427 l 304.012 20.947 l 248.328
+ 20.947 l 248.57 21.189 l 245.09 7.146 l 244.73 7.509 l f
+BT
+4.679998 0 0 4.679998 255.049652 15.42799 Tm
+/f-0-0 1 Tf
+[(SV )-4(a)-8(n)-7(d)-8( )21(I)-4(n)-8(d)-8(e)-8(l)-239(C)4(a)-8(l)-9(l)
+-8(s)12( )]TJ
+0.282051 -1.205128 Td
+[(f)-30(o)-8(r )47(a)-8( )21(G)9(ra)-8(p)-8(h)-8( )21(Ed)-7(g)-8(e)]TJ
+ET
+1 g
+251.328 162.068 46.082 -18.961 re f*
+0.25098 g
+251.09 162.068 m 251.09 162.189 251.211 162.306 251.328 162.306 c 297.41
+ 162.306 l 297.531 162.306 297.648 162.189 297.648 162.068 c 297.648 143.107
+ l 297.648 142.986 297.531 142.869 297.41 142.869 c 251.328 142.869 l 251.211
+ 142.869 251.09 142.986 251.09 143.107 c h
+251.57 143.107 m 251.328 143.349 l 297.41 143.349 l 297.168 143.107 l 297.168
+ 162.068 l 297.41 161.83 l 251.328 161.83 l 251.57 162.068 l f
+BT
+4.199998 0 0 4.199998 259.72965 156.187933 Tm
+/f-0-0 1 Tf
+[(S)10(e)42(l)22(e)42(c)-15(t)-7( )-123(e)42(v)15(i)22(d)13(e)42(n)13(c)
+-43(e)42( )]TJ
+-0.514286 -1.228571 Td
+[(r)-10(e)42(a)42(d)13(s)-14( )-94(f)-8(r)-9(o)41(m)5( )-37(e)42(d)14(g)
+13(e)42(\222)-7(s)-14( )]TJ
+0.428571 -1.2 Td
+[(g)13(e)42(n)13(o)42(m)-24(e)42( )-151(r)-10(e)42(g)14(i)22(o)42(n)13(s)
+]TJ
+ET
+1 g
+251.328 135.787 46.082 -18.957 re f*
+0.25098 g
+251.09 135.787 m 251.09 135.908 251.211 136.029 251.328 136.029 c 297.41
+ 136.029 l 297.531 136.029 297.648 135.908 297.648 135.787 c 297.648 116.83
+ l 297.648 116.709 297.531 116.587 297.41 116.587 c 251.328 116.587 l 251.211
+ 116.587 251.09 116.709 251.09 116.83 c h
+251.57 116.83 m 251.328 117.068 l 297.41 117.068 l 297.168 116.83 l 297.168
+ 135.787 l 297.41 135.548 l 251.328 135.548 l 251.57 135.787 l f
+BT
+4.199998 0 0 4.199998 256.729651 129.907944 Tm
+/f-0-0 1 Tf
+[(T)11(r)-10(a)42(n)13(s)-14(l)22(a)42(t)-37(e)42( )-150(e)41(v)15(i)22
+(d)13(e)42(n)13(c)-43(e)42( )]TJ
+1.571429 -1.228571 Td
+[(r)-10(e)42(a)42(d)13(s)-14( )-94(t)-8(o)42( )-36(S)10(V)10( )]TJ
+0.257143 -1.2 Td
+[(c)-14(a)42(n)13(d)13(i)22(d)14(a)41(t)-36(e)42(s)]TJ
+ET
+1 g
+251.328 109.509 46.082 -14.281 re f*
+0.25098 g
+251.09 109.509 m 251.09 109.627 251.211 109.748 251.328 109.748 c 297.41
+ 109.748 l 297.531 109.748 297.648 109.627 297.648 109.509 c 297.648 95.228
+ l 297.648 95.107 297.531 94.986 297.41 94.986 c 251.328 94.986 l 251.211
+ 94.986 251.09 95.107 251.09 95.228 c h
+251.57 95.228 m 251.328 95.349 l 297.41 95.349 l 297.168 95.228 l 297.168
+ 109.509 l 297.41 109.267 l 251.328 109.267 l 251.57 109.509 l f
+BT
+4.199998 0 0 4.199998 256.129651 103.387954 Tm
+/f-0-0 1 Tf
+[(A)10(s)-14(s)-15(e)42(m)5(b)13(l)22(e)42( )-151(r)-10(e)42(a)42(d)13(s)
+-14( )-94(f)-7(o)41(r)-9( )]TJ
+0.171429 -1.228571 Td
+[(e)42(a)42(c)-15(h)14( )-94(S)10(V)10( )-37(c)-14(a)42(n)13(d)14(i)22(d)
+13(a)42(t)-37(e)]TJ
+ET
+1 g
+246.531 72.427 m 274.371 87.068 l 302.211 72.427 l 274.371 57.787 l f*
+0.25098 g
+246.531 72.306 m 246.41 72.306 246.41 72.427 246.41 72.427 c 246.41 72.548
+ 246.41 72.548 246.531 72.67 c 274.25 87.306 l 274.488 87.306 l 302.328 
+72.67 l 302.328 72.548 302.449 72.548 302.449 72.427 c 302.328 72.306 l 
+274.488 57.67 l 274.371 57.548 274.371 57.548 274.25 57.67 c h
+274.488 58.029 m 274.25 58.029 l 302.09 72.67 l 302.09 72.306 l 274.25 
+86.947 l 274.488 86.947 l 246.648 72.306 l 246.648 72.67 l f
+BT
+4.199998 0 0 4.199998 266.569647 78.667964 Tm
+/f-0-0 1 Tf
+[(I)21(s)-15( )-7(c)-15(o)42(n)13(t)-7(i)22(g)]TJ
+-0.228571 -1.228571 Td
+[(a)42(s)-14(s)-15(e)42(m)5(b)13(l)22(y)14( )]TJ
+-0.257143 -1.2 Td
+[(s)-14(u)13(c)-14(c)-15(e)42(s)-14(s)-14(f)-8(u)13(l)]TJ
+2.057143 -1.228571 Td
+(?)Tj
+ET
+1 g
+246.168 45.427 56.402 -14.398 re f*
+0.25098 g
+245.93 45.427 m 245.93 45.548 246.051 45.67 246.168 45.67 c 302.57 45.67
+ l 302.691 45.67 302.809 45.548 302.809 45.427 c 302.809 31.029 l 302.809
+ 31.029 302.691 30.908 302.57 30.908 c 246.168 30.908 l 246.051 30.908 245.93
+ 31.029 245.93 31.029 c h
+246.41 31.029 m 246.168 31.267 l 302.57 31.267 l 302.328 31.029 l 302.328
+ 45.427 l 302.57 45.189 l 246.168 45.189 l 246.41 45.427 l f
+BT
+4.199998 0 0 4.199998 250.729653 36.787981 Tm
+/f-0-0 1 Tf
+[(S)10(c)-14(o)41(r)-9(e)41( )-93(e)42(a)42(c)-15(h)14( )-94(S)10(V)10( )
+-8(c)-14(a)41(n)14(d)13(i)22(d)13(a)42(t)-36(e)]TJ
+ET
+274.609 57.787 m 274.609 49.267 l 274.129 49.267 l 274.129 57.787 l h
+275.691 49.627 m 274.371 45.427 l 273.051 49.627 l f
+BT
+3.839998 0 0 3.839998 258.76965 52.387975 Tm
+/f-0-0 1 Tf
+[(P)11(r)-11(e)25(ci)3(se)25( )]TJ
+-1.4375 -1.1875 Td
+[(ca)25(n)-6(d)24(i)4(d)25(a)25(t)-4(e)25(s)]TJ
+9.53125 1.1875 Td
+[(I)-3(m)-11(p)25(r)-11(e)25(ci)3(se)25( )]TJ
+-0.4375 -1.1875 Td
+[(ca)25(n)-6(d)24(i)4(d)25(a)25(t)-4(e)25(s)]TJ
+4.199998 0 0 4.199998 275.809643 53.227974 Tm
+/f-1-0 1 Tf
+(Y)Tj
+6.342857 3.428571 Td
+(N)Tj
+ET
+302.211 72.67 m 307.609 72.67 l 307.73 72.67 307.852 72.548 307.852 72.427
+ c 307.852 38.228 l 307.852 38.107 307.73 37.986 307.609 37.986 c 306.41
+ 37.986 l 306.41 38.466 l 307.609 38.466 l 307.488 38.228 l 307.488 72.427
+ l 307.609 72.189 l 302.211 72.189 l h
+306.77 37.029 m 302.57 38.228 l 306.77 39.548 l f
+274.609 169.388 m 274.609 165.908 l 274.129 165.908 l 274.129 169.388 l
+ h
+275.691 166.388 m 274.371 162.068 l 273.051 166.388 l f
+274.609 143.107 m 274.609 139.627 l 274.129 139.627 l 274.129 143.107 l
+ h
+275.691 140.107 m 274.371 135.787 l 273.051 140.107 l f
+274.609 116.83 m 274.609 113.349 l 274.129 113.349 l 274.129 116.83 l h
+275.691 113.83 m 274.371 109.509 l 273.051 113.83 l f
+274.609 95.228 m 274.609 90.908 l 274.129 90.908 l 274.129 95.228 l h
+275.691 91.267 m 274.371 87.068 l 273.051 91.267 l f
+274.609 31.029 m 274.609 25.146 l 274.129 25.146 l 274.129 31.029 l h
+275.691 25.627 m 274.371 21.306 l 273.051 25.627 l f
+0.0509804 g
+BT
+4.679998 0 0 4.679998 247.849655 219.427908 Tm
+/f-0-0 1 Tf
+[(D)4(i)-8(s)12(c)-12(o)-8(v)12(e)-7(r )21(a)-8(n)-8(d)-8( )21(s)13(c)-13
+(o)-8(re)-8( )21(SVs)14( )-4(f)-30(o)-8(r )]TJ
+2.307692 -1.205128 Td
+[(e)-8(a)-8(c)-13(h)-8( )47(g)-8(ra)-8(p)-8(h)-8( )47(e)-8(d)-8(g)-8(e)
+]TJ
+ET
+0.65098 g
+194.809 130.627 m 238.371 227.466 l 238.012 227.587 l 194.449 130.869 l
+ f
+238.012 1.267 m 194.449 114.67 l 194.93 114.787 l 238.371 1.388 l f
+0.25098 g
+37.488 92.587 m 37.488 85.267 l 37.129 85.267 l 37.129 92.587 l h
+38.57 85.748 m 37.371 81.427 l 36.051 85.748 l f
+37.371 197.228 m 37.488 189.908 l 37.012 189.908 l 37.012 197.228 l h
+38.449 190.388 m 37.25 186.189 l 35.93 190.388 l f
+1 g
+8.691 67.627 m 12.289 81.67 l 65.809 81.67 l 62.211 67.627 l f*
+0.25098 g
+8.691 67.267 m 8.691 67.267 8.57 67.388 8.449 67.388 c 8.449 67.748 l 11.93
+ 81.787 l 11.93 81.908 12.168 82.029 12.289 82.029 c 65.809 82.029 l 65.93
+ 82.029 65.93 82.029 66.051 81.908 c 66.051 81.67 l 62.57 67.509 l 62.57
+ 67.388 62.449 67.267 62.211 67.267 c h
+62.211 67.986 m 61.969 67.748 l 65.449 81.787 l 65.809 81.427 l 12.289 
+81.427 l 12.531 81.67 l 9.051 67.509 l 8.691 67.986 l f
+BT
+4.679998 0 0 4.679998 27.649743 75.787965 Tm
+/f-0-0 1 Tf
+[(Se)-8(g)-8(m)-13(e)-8(n)-8(t)-4( )]TJ
+-1.641026 -1.205128 Td
+[(Bre)-8(a)-8(k)-13(e)-8(n)-8(d)-238(G)8(ra)-8(p)-8(h)]TJ
+ET
+1 g
+14.211 102.787 46.078 -12.598 re f*
+0.25098 g
+13.969 102.787 m 13.969 102.908 14.09 103.029 14.211 103.029 c 60.289 103.029
+ l 60.41 103.029 60.531 102.908 60.531 102.787 c 60.531 90.189 l 60.531 
+90.068 60.41 89.947 60.289 89.947 c 14.211 89.947 l 14.09 89.947 13.969 
+90.068 13.969 90.189 c h
+14.449 90.189 m 14.211 90.306 l 60.289 90.306 l 60.051 90.189 l 60.051 
+102.787 l 60.289 102.548 l 14.211 102.548 l 14.449 102.787 l f
+BT
+4.199998 0 0 4.199998 20.209746 97.507957 Tm
+/f-0-0 1 Tf
+[(D)8(e)42(-)-10(n)13(o)42(i)22(s)-43(e)42( )-151(s)-14(e)42(g)13(m)5(e)
+42(n)13(t)-8( )]TJ
+0.571429 -1.228571 Td
+[(b)13(r)-10(e)42(a)42(k)-14(e)42(n)-16(d)-386(g)13(r)-10(a)42(p)13(h)]TJ
+ET
+37.488 113.466 m 37.488 106.146 l 37.129 106.146 l 37.129 113.466 l h
+38.57 106.627 m 37.371 102.427 l 36.051 106.627 l f
+1 g
+9.41 127.267 55.801 -16.801 re f*
+0.25098 g
+9.168 127.267 m 9.168 127.388 9.289 127.509 9.41 127.509 c 65.211 127.509
+ l 65.328 127.509 65.328 127.388 65.328 127.267 c 65.328 110.466 l 65.328
+ 110.349 65.328 110.228 65.211 110.228 c 9.41 110.228 l 9.289 110.228 9.168
+ 110.349 9.168 110.466 c h
+9.531 110.466 m 9.41 110.709 l 65.211 110.709 l 64.969 110.466 l 64.969
+ 127.267 l 65.211 127.029 l 9.41 127.029 l 9.531 127.267 l f
+BT
+4.199998 0 0 4.199998 17.329747 122.467947 Tm
+/f-0-0 1 Tf
+[(M)4(e)42(r)-10(g)14(e)42( )-123(r)-9(e)41(a)42(d)14( )-94(b)13(r)-10(e)
+42(a)42(k)-14(e)42(n)13(d)]TJ
+0.485714 -1.228571 Td
+[(g)13(r)-10(a)42(p)14(h)13( )-94(i)22(n)14(t)-8(o)42( )-65(s)-15(e)42(g)
+14(m)4(e)42(n)13(t)-8( )]TJ
+0.771429 -1.2 Td
+[(b)13(r)-10(e)42(a)42(k)-14(e)42(n)-16(d)-386(g)13(r)-10(a)42(p)13(h)14
+( )]TJ
+ET
+37.488 138.068 m 37.488 130.869 l 37.129 130.869 l 37.129 138.068 l h
+38.57 131.349 m 37.371 127.029 l 36.051 131.349 l f
+1 g
+9.41 146.709 55.801 -12.723 re f*
+0.25098 g
+9.168 146.709 m 9.168 146.83 9.289 146.83 9.41 146.83 c 65.211 146.83 l
+ 65.328 146.83 65.328 146.83 65.328 146.709 c 65.328 133.986 l 65.328 133.869
+ 65.328 133.869 65.211 133.869 c 9.41 133.869 l 9.289 133.869 9.168 133.869
+ 9.168 133.986 c h
+9.531 133.986 m 9.41 134.228 l 65.211 134.228 l 64.969 133.986 l 64.969
+ 146.709 l 65.211 146.466 l 9.41 146.466 l 9.531 146.709 l f
+BT
+4.199998 0 0 4.199998 14.809748 141.427939 Tm
+/f-0-0 1 Tf
+[(T)11(r)-10(a)42(n)13(s)-14(l)22(a)42(t)-37(e)42( )-150(e)41(v)15(i)22
+(d)13(e)42(n)13(c)-43(e)42( )-150(r)-10(e)42(a)41(d)14( )]TJ
+-0.228571 -1.228571 Td
+[(i)22(n)13(t)-7(o)41( )-65(r)-9(e)41(a)42(d)14( )-123(b)14(r)-10(e)42(a)
+42(k)-15(e)42(n)13(d)-415(g)13(r)-9(a)42(p)13(h)]TJ
+ET
+37.488 157.388 m 37.488 150.189 l 37.129 150.189 l 37.129 157.388 l h
+38.57 150.548 m 37.371 146.349 l 36.051 150.548 l f
+1 g
+14.211 166.388 46.078 -12.602 re f*
+0.25098 g
+13.969 166.388 m 13.969 166.509 14.09 166.627 14.211 166.627 c 60.289 166.627
+ l 60.41 166.627 60.531 166.509 60.531 166.388 c 60.531 153.787 l 60.531
+ 153.67 60.41 153.548 60.289 153.548 c 14.211 153.548 l 14.09 153.548 13.969
+ 153.67 13.969 153.787 c h
+14.449 153.787 m 14.211 154.029 l 60.289 154.029 l 60.051 153.787 l 60.051
+ 166.388 l 60.289 166.146 l 14.211 166.146 l 14.449 166.388 l f
+BT
+4.199998 0 0 4.199998 22.009745 161.107931 Tm
+/f-0-0 1 Tf
+[(F)11(i)22(l)22(t)-8(e)42(r)-10( )-122(f)-8(o)42(r)-10( )-8(S)10(V)10( )
+-36(a)42(n)13(d)13( )]TJ
+-0.971429 -1.228571 Td
+[(i)22(n)13(d)14(e)42(l)-378(e)42(v)14(i)22(d)13(e)14(n)13(c)-43(e)42( )
+-151(r)-10(e)42(a)42(d)13(s)]TJ
+ET
+37.488 177.306 m 37.488 170.107 l 37.129 170.107 l 37.129 177.306 l h
+38.57 170.466 m 37.371 166.267 l 36.051 170.466 l f
+1 g
+14.211 186.189 46.078 -12.723 re f*
+0.25098 g
+13.969 186.189 m 13.969 186.306 14.09 186.306 14.211 186.306 c 60.289 186.306
+ l 60.41 186.306 60.531 186.306 60.531 186.189 c 60.531 173.466 l 60.531
+ 173.349 60.41 173.228 60.289 173.228 c 14.211 173.228 l 14.09 173.228 13.969
+ 173.349 13.969 173.466 c h
+14.449 173.466 m 14.211 173.709 l 60.289 173.709 l 60.051 173.466 l 60.051
+ 186.189 l 60.289 185.947 l 14.211 185.947 l 14.449 186.189 l f
+BT
+4.199998 0 0 4.199998 23.329744 180.907923 Tm
+/f-0-0 1 Tf
+[(S)10(e)42(l)22(e)42(c)-15(t)-7( )-123(r)-9(e)41(a)42(d)14(s)-15( )-65
+(i)22(n)14( )]TJ
+-0.542857 -1.228571 Td
+[(g)13(e)42(n)13(o)42(m)-24(e)42( )-151(s)-14(e)42(g)13(m)5(e)41(n)14(t)
+]TJ
+ET
+1 g
+8.809 195.306 m 12.41 209.466 l 65.93 209.466 l 62.328 195.306 l f*
+0.25098 g
+8.809 195.068 m 8.809 195.068 8.691 195.068 8.57 195.189 c 8.57 195.427
+ l 12.051 209.466 l 12.051 209.587 12.168 209.709 12.41 209.709 c 65.93 
+209.709 l 65.93 209.709 66.051 209.709 66.168 209.587 c 66.168 209.349 l
+ 62.691 195.306 l 62.691 195.068 62.57 195.068 62.328 195.068 c h
+62.328 195.67 m 62.09 195.427 l 65.57 209.466 l 65.93 209.107 l 12.41 209.107
+ l 12.648 209.349 l 9.168 195.306 l 8.809 195.67 l f
+BT
+4.199998 0 0 4.199998 19.369746 203.467914 Tm
+/f-0-0 1 Tf
+[(S)10(a)42(m)4(p)13(l)-6(e)42( )-122(A)10(l)22(i)22(g)13(n)14(m)-25(e)
+42(n)14(t)-8(s)]TJ
+2.6 -1.228571 Td
+[(\()-10(B)10(A)10(M)4(s)-14(\))]TJ
+ET
+Q Q
+showpage
+%%Trailer
+end restore
+%%EOF
diff --git a/docs/methods/primary/makepdf.bash b/docs/methods/primary/makepdf.bash
new file mode 100755
index 0000000..57c4ced
--- /dev/null
+++ b/docs/methods/primary/makepdf.bash
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+set -o nounset
+
+
+rel2abs() {
+    (cd $1 && pwd -P)
+}
+
+docdir=$(rel2abs $(dirname $0))
+builddir=$docdir/build
+
+mkdir -p $builddir
+
+mname=methods
+
+do_latex_cmds() {
+  file=$1
+  latex $file
+  bibtex $file
+  latex $file
+  latex $file
+  dvipdf $file
+}
+
+for mm in $mname; do
+(
+cd $builddir
+cp ../packages/* .
+ln -sf $docdir/$mm.tex
+ln -sf $docdir/$mname.bib
+ln -sf $docdir/figures
+do_latex_cmds $mm
+mv $mm.pdf $docdir 
+mv $mm.log $docdir
+rm -f $mm.*
+)
+done
+
+
diff --git a/docs/methods/primary/methods.bib b/docs/methods/primary/methods.bib
new file mode 100644
index 0000000..d7c22d1
--- /dev/null
+++ b/docs/methods/primary/methods.bib
@@ -0,0 +1,53 @@
+
+% 11504945 
+ at Article{pevzner2001,
+   Author="Pevzner, P. A.  and Tang, H.  and Waterman, M. S. ",
+   Title="{{A}n {E}ulerian path approach to {D}{N}{A} fragment assembly}",
+   Journal="Proc. Natl. Acad. Sci. U.S.A.",
+   Year="2001",
+   Volume="98",
+   Number="17",
+   Pages="9748--9753",
+   Month="Aug"
+}
+
+
+% 7265238 
+ at Article{smith1981,
+   Author="Smith, T. F.  and Waterman, M. S. ",
+   Title="{{I}dentification of common molecular subsequences}",
+   Journal="J. Mol. Biol.",
+   Year="1981",
+   Volume="147",
+   Number="1",
+   Pages="195--197",
+   Month="Mar"
+}
+
+
+% 7166760 
+ at Article{gotoh1982,
+   Author="Gotoh, O. ",
+   Title="{{A}n improved algorithm for matching biological sequences}",
+   Journal="J. Mol. Biol.",
+   Year="1982",
+   Volume="162",
+   Number="3",
+   Pages="705--708",
+   Month="Dec"
+}
+
+
+% 22581179 
+ at Article{strelka2012,
+   Author="Saunders, C. T.  and Wong, W. S.  and Swamy, S.  and Becq, J.  and Murray, L. J.  and Cheetham, R. K. ",
+   Title="{{S}trelka: accurate somatic small-variant calling from sequenced tumor-normal sample pairs}",
+   Journal="Bioinformatics",
+   Year="2012",
+   Volume="28",
+   Number="14",
+   Pages="1811--1817",
+   Month="Jul"
+}
+
+
diff --git a/docs/methods/primary/methods.tex b/docs/methods/primary/methods.tex
new file mode 100644
index 0000000..a6d2e0f
--- /dev/null
+++ b/docs/methods/primary/methods.tex
@@ -0,0 +1,519 @@
+\documentclass{article}
+
+\usepackage{natbib}
+
+% for equation*
+\usepackage{amsmath}
+
+% for scalebox,...
+\usepackage{graphics}
+
+% hide hyperref links  with pdfborder (more portable than hidelinks option)
+\usepackage[pdfborder={0 0 0}]{hyperref}
+
+% for pseudocode
+\usepackage{algorithm}
+\usepackage[noend]{algpseudocode}
+
+
+\title{'In-source Methods for Manta Structural Variant and Indel Caller'}
+
+
+% simple scientific notation:
+\newcommand{\e}[1]{\ensuremath{\times 10^{#1}}}
+
+\begin{document}
+
+\maketitle
+
+\tableofcontents
+
+\section{Overview}
+
+On any release branch, the methods described here should reflect the default implementation in the source repository containing this document.
+
+\section{Methods}
+
+\subsection{Manta Workflow Overview}
+
+Manta's structural variant calling workflow contains two major phases. In the first phase, the genome is scanned to build a genome-wide breakend graph. In the second phase, edges of this graph are analyzed to generate SV candidates, each of which is assembled, scored according to diploid or somatic quality models and reported. A high-level schematic of the workflow is described in Figure \ref{fig:workflow} below.
+
+\begin{figure}[h]
+\centerline{
+  \scalebox{1.2}{
+    \includegraphics{figures/workflow.eps}
+  }
+}
+\caption{High-level processing steps and parallelization strategy in Manta's variant calling workflow.}
+\label{fig:workflow}
+\end{figure}
+
+For the breakend graph created in phase one, each node represents a region of the genome where evidence for at least one breakend exists. Edges represent evidence of a junction between two breakends. By design, the edges in the breakend graph do not correspond to a specific SV candidate hypotheses, instead they represent evidence of potentially many junctions between regions. This lack of specificity allows the graph for all variant types in the genome to be compactly represented in a sm [...]
+
+Phase two of Manta's workflow occurs after the complete breakend graph is created. Given the complete breakend graph, sets of graph edges are approximated as independent structural variant discovery problems and each of these sets is analyzed in parallel. Each structural variant discovery process starts from one (or more) edges from the breakend graph, discovers any structural variant candidates on its edge (or edges) and executes assembly, scoring and reporting for each candidate variant.
+
+Manta's two phase structure enables a high level of parallelization. In the first phase a set of independent tasks generate subsets of the breakend graph corresponding to small segments of the genome (Manta's default segment size is 12 Megabases). At the end of this phase there is a final step to merge all genome segment graphs into a single graph for the whole genome. In the next phase the genome breakend graph edges are divided among a set of independent SV discovery processes. The out [...]
+
+\subsection{Fragment size and depth statistics}
+
+Before entering the first major phase of the workflow, Manta completes a rapid estimation of fragment size and read depth information. During this step the sequence fragment size distribution is estimated for each BAM file provided as input. To accomplish this efficiently, the size distribution is sampled until convergence, so that only a small subset of the input sequence alignments for each sample need to be scanned. To reduce regional bias, this step cycles through several small secti [...]
+
+A related prerequisite step Manta performs is estimation of the sequencing depth for each chromosome. For somatic analysis this depth is only computed for the normal sample. This information is used downstream to filter out high-depth regions (details below) when Manta is run in its default whole genome sequencing mode, these values can be ignored for exome or other targeted analyses.
+
+\subsubsection{Chromosome depth estimation}
+
+For each chromosome, depth is estimated using a modified median calculation. As a first step, each chromosome is partitioned into segments of similar size to ensure the estimation process samples several chromosome regions. The chromosome is divided into the smallest number of segments no larger than $S_{max}$, where all segments have nearly equal size (all sizes must be $S$ or $S+1$, given smallest segment size of $S$). If this procedure results in more than 20 segments than $S_max$ is  [...]
+
+The depth estimation procedure repeatedly cycles through all segments. Within each segment, at least 40,000 reads are scanned before moving to the next segment (additional reads are scanned until the mapping position changes.) After the target number of reads have been scanned from every segment in the chromosome, the procedure returns to the first position and repeats this procedure starting from the last unscanned position. The process repeats until the all reads in all segments are sc [...]
+
+Each scanned read is filtered if it is unmapped. Otherwise the read alignment is ignored and the read is applied to a simple depth pileup assuming a complete and ungapped alignment starting from the mapping position. Every 1M reads triggers a convergence check, but only after every chromosome segment has been sampled at least once.
+
+Depth is estimated from the resulting pileup, using a simple median over all depth observations from the pileup structure excluding zero counts. Convergence is checked between the depth estimate of the last convergence check and the current one. An absolute change of less than 0.05 is treated as converged (or given the median case, the integer median estimates must be an exact match).
+
+The depth estimation procedure is run separately for each non-tumor sample, and all high-depth thresholds are set based on the sum of depth estimates over these samples.
+
+\subsection{Manta Workflow Phase One: Breakend graph construction}
+
+\subsubsection{Breakend graph structure}
+
+The breakend graph $G$ describes regions of the genome which are connected by one or more possible breakend junctions. The graph, $G = (V, E)$, comprises a set of nodes $V$ and a set of directed edges $E$. Each node $v \in V$ is annotated with a contiguous genome segment containing one or more breakends. Each edge $e \in E$ connects nodes where evidence of a breakend junction exists. Edge direction is set from the node where a direct read mapping observation is observed to the node where [...]
+
+This graph is designed to contain only the information required to partition the SV discovery problem into a set of independent SV discovery processes in the second workflow phase. It does not provide sufficient detail to support candidate discovery on its own. According to this design the breakend graph for full human genomes can be represented in a small memory footprint. In practice, a case has yet to be found where the graph (and its indexing structures) require greater than 2Gb, inc [...]
+
+\subsubsection{Genome segment distribution}
+
+The breakend graph is constructed by first computing subgraphs corresponding to individual segments of the genome. Graph construction is run in parallel over a set of disjoint segments covering the entire genome, followed by a graph merging step. Given a maximum segment size (default is 12 Mb), each chromosome is partitioned into a set of nearly equal size segments without exceeding the maximum segment size, and one subgraph construction process is started for each.
+
+\subsubsection{Genome segment breakend graph construction}
+
+For each genome segment, input BAM file(s) are scanned over the corresponding genome region for breakend associated reads, which are merged into the segment's breakend graph. To do so, each mapped read in the genome segment must first pass through a filtration process to eliminate noise. Any remaining reads are then translated into a set of small breakend graphs (currently each of these graphs is a single edge connecting either one or two nodes). These graphs are merged into the segment' [...]
+
+\paragraph{Filtration}
+For graph construction, reads are removed if they are marked as secondary, filtered or PCR duplicates. Reads also must be mapped and have a mapping quality of 15 or greater. A read pair is determined to be anomalous if it has (1) unexpected read orientation, (2) a mate mapping to another chromosome, or (3) an unexpectedly large fragment size. For the fragment size to be considered anomalous at the initial filtration stage, it must be greater than $1.5s_{max}$, where $s_{max}$ is the 99th [...]
+
+\paragraph{Read pair translation to breakend graph set}
+Each unfiltered read pair is translated into a set of breakend graphs via a two step process. The read pair is first translated to a set of SV candidates. The translation process allows for the possibility that the BAM record for only one read of the pair is available, in which case the length and alignment of the remote mate read is approximated. The logic to translate read pairs to SV candidates is the same as that used in SV hypothesis generation during Manta's second phase.
+
+The translation of each read (or read pair) into a set of SV candidates proceeds as follows:
+\begin{itemize}
+\item \textit{Anomalous Read Pairs} We create an SV candidate for any read pair with an unexpected orientation, chimeric mapping or anomalously large fragment size. To determine large fragment size we find $r_p$, the extent of the read pair in reference coordinates, and accept a read pair if $r_p > s_{max} + c_{min}$, where $c_{min}$ is the minimum candidate indel size. If accepted, the candidate breakend regions extend from the end of each read to length $\max(40, s_{br} - r_1 - r_2)$,  [...]
+
+\begin{equation*}
+f_l =
+\left\{
+\begin{array}{cl}
+1 & \mbox{ chimeric/anomalous orientation,} \\
+0 & \mbox{ $r_p < s_{sr}$, } \\
+1 & \mbox{ $r_p > s_{lr}$, } \\
+\frac{r_p - s_{sr}} {s_{lr} - s_{sr}} & \mbox{otherwise.}
+\end{array}
+\right.
+\end{equation*}
+
+\item \textit{Read alignment gaps} Any gaps indicated in the an individual read alignment are translated into SV candidates if the total inserted or deleted sequence length exceeds the minimum indel candidate size. The SV candidate breakend regions are expanded by 20 bases in each direction form the exact breakpoint implied in the read alignment.
+
+\item \textit{Poorly aligned read edges} For each read edge we force any soft-clipped regions to align to the reference and then evaluate for a high density of base mismatches at the read edge. To do so we find $d_{m}$, the minimum distance from the edge where the next 5 positions match the reference. If $d_{m}$ is at least 8 and at least 3/4 of the basecalls in the range [1,$d_{m}$] have quality 20 or greater, we define a complex region candidate, which has one region extending 20 bases [...]
+
+Complex region candidates indicate that there is evidence for small indels or SVs within a region without proposing a specific SV hypothesis. Assembly of the complex region will be performed to refine the candidate into a specific set of indels.
+
+\item \textit{Split-reads} Split-read alignments as indicated using the BAM record ``SA" tag convention will generate SV candidates as well. For reads which are split into no more than 2 segments, where both segments have a mapping quality of at least 15, the SV candidate implied by the split-read junction is generated. The two SV candidate regions are extended by 20 bases in each direction from the implied junction site.
+
+\end{itemize}
+
+These SV candidates are translated into breakend graph elements by converting SV breakend regions into graph nodes spanning the same region and representing the connection between regions as a directed edge from the region of direct evidence (the locally observed read of a pair), to the region of indirect evidence (the remote read of a pair). Complex region candidates are translated into a single graph node spanning the candidate region with a self-edge added to indicate that there is ev [...]
+
+\paragraph{Merge and de-noising procedure}
+
+In the procedure above, each SV-associated read or read pair is translated into one or more single-edge breakend graphs. Each one of the these single-edge breakend graphs is then sequentially merged into the breakend graph for the entire genomic segment. Merging is performed using a procedure which is general to any two breakend graphs. The high-level steps to this procedure are (1) take the union of the two input graphs, (2) find nodes from the graph union which qualify for merging, and [...]
+
+The node merging criteria are described in more detail below, the underlying rationale for this merging process is that each graph node represents a region of the genome and therefore two graph nodes representing sufficiently similar regions of the genome could be regarded as `matching' nodes representing the same region. Similarly two edges connecting two sets of matching nodes could be regarded as `matching' edges sharing an SV evidence association between the same two regions. Note th [...]
+
+\renewcommand{\algorithmicrequire}{\textbf{Input:}}
+\renewcommand{\algorithmicensure}{\textbf{Output:}}
+\newcommand*\Let[2]{\State #1 $\gets$ #2}
+
+
+\begin{algorithm}[h]
+\floatname{algorithm}{Method}
+\caption{Merge breakend graphs}
+\label{method:merge}
+\begin{algorithmic}[1]
+\Require{Two breakend graphs $G_1$ and $G_2$}
+\Ensure{Merged breakend graph $G$}
+
+\Procedure{MergeGraphs}{$G_1,G_2$}
+\State $G \gets G_1 \cup G_2$
+\State \Call{AnnotateMergeableNodes}{$G$}
+\State \Call{MergeNodes}{$G$}
+\EndProcedure
+\end{algorithmic}
+\end{algorithm}
+
+The high level merge procedure is outlined in Method \ref{method:merge}.The two input graphs are merged by taking their union, identifying and annotating nodes in the graph union which are eligible for merging (described in Method \ref{method:annotate}), and executing the eligible node merges (described in Method \ref{method:mergenodes}).
+
+\begin{algorithm}[t!]
+\floatname{algorithm}{Method}
+\caption{Annotate mergeable graph nodes}
+\label{method:annotate}
+\begin{algorithmic}[1]
+\Require{Breakend graph $G$}
+\Ensure{Breakend graph $G$ with annotation of mergeable nodes}
+\Procedure{AnnotateMergeableNodes}{$G$}
+\For {node $v$ in $G$} \label{lst:line:clear}
+\State mergeable[$v$] $\gets$ false
+\EndFor
+\For {node $v$ in $G$}  \label{lst:line:sigstart}
+\For {node $w$ in adjacent[$v$]}
+\If {evidence[$(v,w)$] $\geq e_s$}
+\State mergeable[$v$] $\gets$ true
+\State mergeable[$w$] $\gets$ true   \label{lst:line:sigend}
+\EndIf
+\EndFor
+\EndFor
+\For {node $v$ in $G$} \label{lst:line:noisestart}
+\For {node $w$ in adjacent[$v$]}
+\State $M \gets \{v,w\}$
+\State $e_{\text{merged}} \gets $ evidence[$(v,w)$]
+\For {node $x$ in \Call{IntersectingNodes}{$G$,$w$}}
+\For {node $y$ in adjacent[$x$]}
+\If {$y \in $ \Call{IntersectingNodes}{$G$,$v$}}
+\State $e_{\text{merged}} \gets e_{\text{merged}} +$ evidence[$(x,y)$]
+\State $M \gets M \cup \{x,y\}$
+\EndIf
+\EndFor
+\EndFor
+\If {$e_{\text{merged}} \geq e_s$}
+\For {node $m$ in $M$}
+\State mergeable[$m$] $\gets$ true \label{lst:line:noiseend}
+\EndFor
+\EndIf
+\EndFor
+\EndFor
+\EndProcedure
+\Statex
+
+\Procedure{IntersectingNodes}{$G$,$v$}
+\State $W \gets \{\}$
+\For {node $w$ in $G$, $w \neq v$}
+\If {region[$v$] $\cap$ region[$w$]}
+\State $W \gets W \cup \{w\}$
+\EndIf
+\EndFor
+\State \Return $W$
+\EndProcedure
+
+\end{algorithmic}
+\end{algorithm}
+
+
+The procedure used to identify `mergeable' nodes in a breakend graph is shown in Method \ref{method:annotate}. The first major step (lines \ref{lst:line:sigstart}-\ref{lst:line:sigend}) is to find all nodes incident on an edge with evidence weight already meeting the signal threshold $e_s$. These nodes are annotated as eligible for merging. The second step of the annotation procedure (lines \ref{lst:line:noisestart}-\ref{lst:line:noiseend}) is to find all sets of nodes which, if merged,  [...]
+
+
+\begin{algorithm}[t!]
+\floatname{algorithm}{Method}
+\caption{Merge all graph nodes annotated as mergeable}
+\label{method:mergenodes}
+\begin{algorithmic}[1]
+\Require{Breakend graph $G$ with annotation of mergeable nodes}
+\Ensure{Breakend graph $G$ with eligible nodes merged}
+\Procedure{MergeNodes}{$G$}
+\State $R \gets \{\}$
+\For {node $v$ in $G$, mergeable[$v$] is true} \label{lst:line:findstart}
+\Loop
+\State $W \gets$ \Call{IntersectingNodes}{$G$,$v$}
+\If {$W = \{\}$} break \EndIf
+\For {node $w$ in $W$, mergeable[$w$] is true} \label{lst:line:findend}
+\State region[$v$] $\gets$ region[$v$] $\cup$ region[$w$] \label{lst:line:regionstart}
+\State region[$w$] $\gets \{\}$ \label{lst:line:regionend}
+\For {node $x$ in adjacent[$w$]}
+\State \Call{MergeEdgeEvidence}{$v$,$w$,$x$}
+\EndFor
+\State $R \gets R \cup \{w\}$
+\EndFor
+\EndLoop
+\EndFor
+\State $G \gets G \setminus R$
+\EndProcedure
+\Statex
+
+\Procedure{IntersectingNodes}{$G$,$v$}
+\State $W \gets \{\}$
+\For {node $w$ in $G$, $w \neq v$}
+\If {region[$v$] $\cap$ region[$w$]}
+\State $W \gets W \cup \{w\}$
+\EndIf
+\EndFor
+\State \Return $W$
+\EndProcedure
+\Statex
+
+\Procedure{MergeEdgeEvidence}{$v$,$w$,$x$} \label{lst:line:mee}
+\If    {$w = x$}
+\State evidence[$(v,v)$] $\gets$ evidence[$(v,v)$] + evidence[$(w,w)$]
+\ElsIf {$v = x$}
+\State $e_{\text{max}} \gets$ max(evidence[$(w,v)$],evidence[$(v,w)$])
+\State evidence[$(v,v)$] $\gets$ evidence[$(v,v)$] + $e_{\text{max}}$
+\Else
+\State evidence[$(v,x)$] $\gets$ evidence[$(v,x)$] + evidence[$(w,x)$]
+\State evidence[$(x,v)$] $\gets$ evidence[$(x,v)$] + evidence[$(x,w)$]
+\EndIf
+\EndProcedure
+\end{algorithmic}
+\end{algorithm}
+
+Following annotation of nodes for merge eligibility, the procedure to execute merging for eligible nodes is applied as described in Method \ref{method:mergenodes}. For two nodes to merge, both must be annotated as eligible and their genomic region annotation must intersect. In Method \ref{method:mergenodes}, lines \ref{lst:line:findstart}-\ref{lst:line:findend} describe the procedure to produce such qualifying node pairs $v$ and $w$. To merge node $w$ into $v$, first the union of the reg [...]
+
+In practice, the evidence weight signal threshold $e_s$ defaults to 9 and the evidence weight applied for an non-penalized split or paired read observation (as described in the previous section) is 3. Thus the above described breakend graph merging procedure effectively requires 3 split or paired read observations supporting the same region association before merging this evidence in the breakend graph.
+
+The breakend graph merge procedure can be limited under conditions of high graph complexity. If the search for all nodes intersecting a candidate merge node (as described in the IntersectingNodes procedure in Method \ref{method:annotate}) results in 500 or more nodes or greater than 0.5 nodes per base in the search region, the merge of the associated candidate edge is stopped. This step is designed to limit graph complexity in repetitive and pericentromeric regions.
+
+As the target genome segment is scanned, the region breakend graph is periodically de-noised. The procedure is only performed for edges where the input alignment files have been scanned over the genomic regions of the nodes incident on the edge (excluding small buffer zones at the edges of the scanned genome segments). For any edge $e = (v,w)$ which is eligible for de-noising, if both evidence[$(v,w)$] and evidence[$(w,v)$] are less than $e_s$, the edge is removed from the graph, togethe [...]
+
+\subsubsection{Global breakend graph construction}
+
+Using the graph merging and de-noising procedures described in the previous section, it is straightforward to create the global breakend graph from the set of genome segment graphs. To do so the genome segment breakend graphs are sequentially merged into the global breakend graph using the breakend graph merge procedure described in the previous section. This merging step is followed by a final de-noising step which is the same as that described for genome segments above, except that all [...]
+
+\subsection{Manta Workflow Phase Two: Hypothesis generation and scoring}
+
+After construction of the global breakend graph, each set of independent breakend-associations is distributed among a group of SV discovery workers running in parallel. Each worker handles SV hypothesis generation, scoring and reporting for its input breakend associations. At present an independent breakend association is approximated as a single breakend graph edge, however Manta's design enables larger highly-connected subgraphs to be distributed to SV discovery workers in the future t [...]
+
+\subsubsection{SV hypothesis generation}
+
+Hypothesis generation currently proceeds from a single breakend graph edge. The noise filtration criteria for hypothesis generation are more strict than that used in the graph de-noising step, so that any edge $e = (v,w)$ where either evidence[$(v,w)$] or evidence[$(w,v)$] is less than $e_s$ is filtered.
+
+For any non-filtered edge, the first step of hypothesis generation is to find all reads potentially associated with SVs which span the node(s) connected by the input graph edge. For this purpose, reads are searched over the evidence range recorded for each graph node. The evidence range is a superset of the node's breakend-associated range describing the region containing mapped reads which contribute breakend evidence for the node. These regions are scanned for read evidence using the s [...]
+
+After gathering SV-associated reads from the targeted regions, each read or read pair is translated into a set of SV candidates using the same logic described for graph construction above. Any SV candidate which does not intersect the targeted graph edge is discarded. Intersecting the graph edge is defined as having one breakend intersect each of the two genomic regions connected by the edge. The SV candidates remaining after this filtration process are merged into a combined SV candidat [...]
+
+\subsubsection{SV hypothesis refinement}
+
+Initial SV hypothesis generation translates each graph edge into a set of low-resolution SV candidates, where breakends are associated with an imprecise region. In the refinement step, each low-resolution SV candidate is translated into a set of basepair-resolution SV candidates via local read assembly. If the assembly procedure fails to produce at least one basepair-resolution candidate, then the low-resolution candidate can still be scored and reported in the variant caller output as a [...]
+
+Hypothesis refinement is described below in two alternate forms: standard SV assembly describes the case of two remote breakend regions associated with a specific SV candidate; complex region assembly describes the case of a single breakend region where small indels or SVs are likely to exist, but no specific variant hypothesis exists.
+
+\paragraph{Contig assembly}
+
+The contig assembly procedure gathers SV-associated reads from the candidate region or regions and assembles these reads into contigs intended to represent variant haplotypes.
+
+As a first step, expanded breakend regions are defined, which will be scanned for assembly evidence and used for aligning any assembled contigs back to the reference. These regions are an expansion of the breakend regions of the low-resolution input SV candidate. Regions are expanded by 250 bases for conventional SV candidates and 700 bases for complex region candidates.
+
+Candidate assembly reads are gathered from all expanded breakend regions. The input reads are assembled without considering their reverse complement sequences, because their orientation in the alternate contig can be inferred from either the mapped portion of the read or a mapped mate pair. In cases where the orientation of the two breakends is not the same (such as for an inversion), input read orientation is standardized to be consistent with the first breakend. Reads are selected as i [...]
+
+The selected reads are assembled using a simple algorithm implicitly based on the popular de Bruijn graph approach originated by Pevzner \citep{pevzner2001}. We note here that the orientation of each unmapped assembly read is determined by the orientation of its mapped partner, avoiding the need to deduce this during the assembly. Assembly is attempted over a series of word sizes, starting from a minimum value which is increased until repeat filtration criteria are met or the maximum wor [...]
+
+For a given word size $k$, a list is made of all $k$-mers present in the input reads. The most frequent $k$-mer is chosen as a seed for the resulting contig assembly if it is observed in at least 3 reads. The contig is extended from this seed in each direction by choosing $k$-mers from this list having a $k-1$ overlap with the contig end. To avoid haplotype switching, reads that support the contig are tracked at each extension point. When there are multiple potential extensions, a $k$-me [...]
+
+\paragraph{Contig alignment for large SVs} For large SV candidates spanning two distinct regions of the genome, the reference sequences are extracted from the two expected breakend regions, and the order and/or orientation of the references is adjusted such that if the candidate SV exists, the left-most segment of the SV contig should align to the first transformed reference region and the right-most contig segment should align to the second reference region. The contig is aligned across [...]
+
+\begin{figure}[!tpb]
+\centerline{
+  \scalebox{0.48}{
+    \includegraphics{figures/jumpstate.eps}
+  }
+}
+\caption{State transitions for Manta's structural variant contig aligner. The alignment scheme uses conventional Smith-Waterman-Gotoh style affine gap alignment for the reference region in the vicinity of each breakend (possibly after strand reversal), with an additional `jump' state which provides a transition between the two breakend alignment regions. Note that a direct transition from the jump state to an insertion is allowed to enable alignment of novel sequence insertions at the br [...]
+\label{fig:jumpstate}
+\end{figure}
+
+The alignment scores used for each reference segment are (2,-8,-12,-1) for match, mismatch, gap open and gap extend. Switching between insertion and deletion states is allowed at no cost. Scores to transition into and extend the 'jump' state are -24 and 0, respectively. The jump state is entered from any point in reference segment 1 and exits to any point in reference segment 2. The alignments resulting from this method are only used when a transition through the jump state occurs. In ad [...]
+
+
+\paragraph{Contig alignment for complex region candidates}
+Complex regions are segments of the genome targeted for assembly without a specific variant hypothesis. For this reason the problem of aligning contigs for these regions is somewhat more difficult than for specific large SV candidates, because a wide range of variant sizes are possible. This is reflected in the alignment procedure for complex region contigs, which are checked against two aligners optimized for large and small indels respectively.
+
+A contig is first aligned with the large indel aligner and only checked for small indels if no large indels are found. The structure of the large indel aligner is a variant on a standard affine-gap scheme, in which a second pair of delete and insert states are added for large indels. Alignment scores for standard alignment states are (2, -8, -18, -1) for match, mismatch, gap open, and gap extend. Open and extend scores for 'large' gaps are -24 and 0. Transitions are allowed between stand [...]
+
+If the large indel aligner fails to identify a candidate meeting the size and quality criteria above, the contig is used to search for smaller indels, this time using a conventional affine gap aligner with parameters: (2,-8,-12,0) for match, mismatch, gap open, gap extend. All indels larger than the minimum indel size are identified. For each indel, the flanking contig alignment quality and uniqueness checks described above are applied to filter likely false positives, and any remaining  [...]
+
+\paragraph{Large Insertions}
+
+Fully assembled large insertions will be detected by the standard contig assembly and alignment pipeline described above. Additional logic is added to detect and report the signature of a large insertion represented by two contig alignments which are consistent with left and right breakends of a large insertion. Under this scheme, all contig alignments are checked for the a signature of a high quality alignment for only a left or right subsegment of the contig. If two contigs form a left [...]
+
+\paragraph{Post-alignment}
+Following alignment of either complex region or standard SV candidates, the homology length and inserted sequence at the breakend junction are extracted from the alignment so that these can be included in the candidate scoring and reporting stages.
+
+The total set of refined candidates (or imprecise candidates in the case of assembly failure) are reported to a `candidate' VCF file. This file does not include scoring or quality filtration information but provides partial support for applications which are not represented by Manta's current scoring models (i.e. tumor without matched normal), as a method development aid, or as input to a small variant caller or another SV scoring/genotyping method. As an example of the latter case. in t [...]
+
+\subsubsection{Scoring}
+
+Following initial SV hypothesis generation and refinement via assembly, all candidate variants are scored. In this phase, multiple scoring models can be applied to the candidate variants. Manta currently applies a diploid scoring model for individual and control samples, as well as a somatic scoring model when a tumor and matched normal sample pair are given.
+
+\paragraph{Diploid scoring model}
+
+The diploid scoring model produces diploid genotype probabilities for each candidate structural variant. Candidates are approximated as independent for scoring purposes, therefore we apply a simple model with a single alternate allele, that is, for reference and alternate alleles $A = \{r,x\}$ the genotype states at each allele are restricted to $G = \{rr, rx, xx\}$. We solve for the posterior probability over G using
+
+\begin{equation*}
+P( G \vert D ) \propto P( D \vert G )  P (G)
+\end{equation*}
+
+\noindent
+where $D$ are all supporting read fragments for either allele. The prior $P(G)$ is
+
+\newcommand{\thz}{\theta_{\textnormal{SV}}}
+
+\begin{equation*}
+P ( G ) =
+\left\{
+\begin{array}{rl}
+\thz      & \mbox{ if $rx$} \\
+\thz / 2  & \mbox{ if $xx$} \\
+1 - \thz 3 / 2  & \mbox{ if $rr$}
+\end{array}
+\right.
+\end{equation*}
+
+\noindent
+where the SV heterozygosity is $\thz = 1\e{-5}$.
+
+The likelihood $P(D \vert G)$ is computed assuming that each read fragment $d \in D$ represents an independent observation of the sample
+
+\begin{equation*}
+P(D \vert G) = \prod_{d \in D} P(d \vert G)
+\end{equation*}
+
+\noindent
+where the fragment likelihood is
+
+\begin{equation*}
+P(d \vert G) = \sum_{a \in A} P(d \vert a) P(a|G)
+\end{equation*}
+
+This likelihood for each fragment to support a given allele $P(d \vert a)$ is common to both diploid and somatic scoring models, and is detailed below.
+
+
+\paragraph{Somatic scoring model}
+
+The somatic scoring model expresses the probability that the candidate variant is somatic, i.e exists in the tumor but not in a matched normal or other type of control. The scoring model used in Manta is a simplification of the same model used in the Strelka small variant caller \cite{strelka2012}. In this simplified form a 'somatic genotype' state space is defined consisting of non-somatic germline variant states $\{rr, rx, xx\}$, a noise state $n$ representing spurious observations at  [...]
+
+\begin{equation*}
+P( S \vert D ) \propto P( D \vert S )  P (S)
+\end{equation*}
+
+\noindent
+where $D$ are all supporting read fragments for either allele. The prior $P(S)$ is
+
+\begin{equation*}
+P ( S ) =
+\left\{
+\begin{array}{rl}
+P ( s )  & \mbox{ if $s$} \\
+P ( n , x )  & \mbox{ if $n$} \\
+\thz      & \mbox{ if $rx$} \\
+\thz / 2  & \mbox{ if $xx$} \\
+1 - \thz 3 / 2 - P(s) - P(n,x)  & \mbox{ if $rr$}
+\end{array}
+\right.
+\end{equation*}
+
+\noindent
+where the somatic variant prior is $P(s) = 1\e{-7}$, the germline SV heterozygosity is $\thz = 1\e{-5}$, and the noise prior $P(n,x)$ is a function of the alternate allele size, set to $1\e{-10}$ for large events and $1\e{-9}$ for small events with a linear transition between large and small events from 10000 to 5000 bases in size.
+
+The likelihood above is computed from independent sample-specific likelihoods, $P( D \vert S ) = P( D_t \vert S )P( D_n \vert S )$, where $D_t$ and $D_n$ indicate tumor and normal sample data. Each somatic genotype implies a variant allele frequency in the normal and tumor samples $f_n,f_t$ as follows
+
+\begin{equation*}
+f_n, f_t =
+\left\{
+\begin{array}{rl}
+0, \hat{f_t} & \mbox{ if $s$} \\
+\hat{f}, \hat{f} & \mbox{ if $n$} \\
+0.5, 0.5 & \mbox{ if $rx$} \\
+1, 1 & \mbox{ if $xx$} \\
+0, 0 & \mbox{ if $rr$}
+\end{array}
+\right.
+\end{equation*}
+
+The model assumes each read fragment $d \in D$ contributes somatic genotype evidence independently
+
+\begin{equation*}
+P(D \vert S) = \prod_{d \in D} P(d \vert S)
+\end{equation*}
+
+\noindent
+with fragment likelihood
+
+\begin{equation*}
+P(d \vert S) = \sum_{a \in A} P(d \vert a) P(a | S)
+\end{equation*}
+
+\noindent
+where $P(a|S)$ is derived from the expected tumor and normal allele frequencies as discussed above. The likelihood for each fragment to support a given allele $P(d \vert a)$ is shared with the germline model and detailed further below.
+
+\paragraph{Somatic calling tiers}
+
+An additional feature of the somatic scoring model is that it uses two calling tiers to reduce false positives. Tier 1 uses relatively stringent noise filtration parameters, while the Tier 2 is more permissive. All calls are initially made using Tier 1 settings, after which the variant is called again using Tier 2. Manta reports the minimum of the two somatic call qualities $Q = \min(Q_{\text{Tier 1}},Q_{\text{Tier 2}})$ as the final somatic quality score. The parameters used for each ti [...]
+
+\paragraph{Allele likelihood computation}
+
+For any read fragment $d$ which interacts with one of the variant allele breakends, the likelihood $P(d \vert a)$ is found for the reference and alternate allele $a$. As described above, these values are shared by diploid and somatic quality scoring schemes for each variant. The read fragment likelihood combines both paired-read and split-read evidence, approximating their contributions as independent:
+
+\begin{equation*}
+P(d \vert a) = P ( \text{len}(d,a) \vert a) P( r_1(d) \vert a) P ( r_2(d) \vert a)
+\end{equation*}
+
+Here $\text{len}(d,a)$ is the fragment length estimated in the context of allele $a$, and $r_1(d),r_2(d)$ are the sequenced reads from the fragment which can be used as split-read evidence when these sequences cross the variant breakend with sufficient context. Note that each sequence fragment may contribute only paired-read support, only split-read support, or both.
+
+For a sequence fragment to contribute to the paired-read component of the likelihood, the fragment must overlap the breakend of at least one allele such that the breakend is spanned by at least 50 bases on both sides. For deletions smaller than 500 bases, the weight of paired read evidence is reduced to zero on a linear ramp from size 500 to 300. Read pairs are only used when both reads have a mapping quality of at least 15, except in the Tier 2 evaluation of the somatic model, in which  [...]
+
+For the term $P ( \text{len}(d,a) \vert a)$, the probability of the observed or more extreme fragment length is used, the chance of a spurious chimera observation $P(c \vert \neg a)$ given that the sample supports an 'other' allele $\neg a$ is also accounted for:
+
+\begin{equation*}
+P ( \text{len}(d,a) \vert a) = P ( \text{len}(d,a) ) (1-P(c \vert a))  + P (c \vert \neg a)
+\end{equation*}
+
+In the diploid model the chimera probabilities are the same for both alleles $P(c \vert a) =  1\e{-3}$. In the somatic model these are $P(c \vert a) =  1\e{-4}$ by default; but for Tier 2 analysis, the alternate allele chimera probablity is set to $P(c \vert x) =  5\e{-6}$ for the normal sample only.
+
+For the split-read computation, each read is realigned across both breakends of the reference and variant alleles. Any read which crosses the breakend with at least 16 bases on both sides, has at least 75\% matches on each side and 90\% matches overall is classified as 'supporting' a breakend, and thus is allowed to contribute to the split-read evidence. The likelihood of the read for each of the two alleles, assuming the read is correctly mapped to the locus $m$, is
+
+\begin{equation*}
+P (r \vert a,m) = \prod_{b_r \in r} P(b_r \vert b_a)
+\end{equation*}
+
+\noindent
+where $P(b_r \vert b_a)$ is the probability of observing basecall $b_r$ given the corresponding base $b_a$ of the evaluated allele. Using the basecall quality error estimate $e$ this is
+
+\begin{equation*}
+P(b_r \vert b_a) =
+\left\{
+\begin{array}{rl}
+1-e & \mbox{ if $b_r=b_a$,} \\
+e/3 & \mbox{ otherwise.}
+\end{array}
+\right.
+\end{equation*}
+
+A spurious read mapping $P(\neg m \vert \neg a)$ given that the sample actually supports an `other' allele type $\neg a$ at this locus is also accounted for:
+
+\begin{equation*}
+P ( r \vert a) = P ( r \vert a,m ) (P(m \vert a))  + P (\neg m \vert \neg a)
+\end{equation*}
+
+In theory, the mapping qualities for each read should be used to set the spurious mapping values, however in Manta's current implementation constant values are used to approximate read mapping errors.  In all models the reference allele erroneous mapping probability is $P(\neg m \vert r) =  1\e{-6}$. In the diploid model the alternate allele erroneous mapping probability is $P(\neg m \vert x) =  1\e{-4}$. In the somatic model this value is the same except for the Tier 2 evaluation, in wh [...]
+
+\paragraph{Variant Filters}
+
+Filters are applied in a final step to improve precision of the scored output. These filters include the minimum quality scores appropriate for each scoring model and additional terms which correlate with error modes not represented in the scoring models.
+
+The current filters are:
+
+\begin{itemize}
+
+\item \textit{High read depth} To remove calls in pericentromeric and other regions with collapsed reference representation, calls with very high depth relative to the mean depth of the chromosome are filtered out. Note for somatic calling only the depth of the normal sample is used for testing filtration.
+
+The depth associated with the variant call is found from searching within 50 bases of each breakend region's center position. The position with the highest depth in the normal sample within these regions is treated as the variant depth. If the variant depth exceeds 3 times the average chromosome depth then the variant is filtered.
+
+Note Manta has special analysis modes for exome/targeted and RNA-Seq analysis in which case this filter is not used.
+
+\item \textit{High MAPQ0 fraction} For any variant less than 1000 bases, an additional filter is applied to calls with too many reads with a mapping quality of 0 (MAPQ0) in the normal sample. As per the depth, the MAPQ0 fraction associated with the variant call is found from searching within 50 bases of each breakend region's center position in the normal sample. If the percent of MAPQ0 reads from either breakend exceeds 40\%, then the variant is filtered.
+
+\item \textit{Large events with no paired read support} For the diploid model only, non-insertion calls larger than the 95th percentile of the fragment length distribution are filtered out if no read pairs are found which are significantly more likely (Q30+) under the alternate allele compared to the reference allele.
+
+\item \textit{Low quality scores} For diploid scoring any variants with genotype quality less than 20 are marked as filtered. For somatic scoring any variants with somatic quality less than 30 are marked as filtered.
+
+\end{itemize}
+
+
+\bibliographystyle{alpha}
+\bibliography{methods}
+
+\end{document}
diff --git a/docs/methods/primary/packages/algorithm.sty b/docs/methods/primary/packages/algorithm.sty
new file mode 100644
index 0000000..8558565
--- /dev/null
+++ b/docs/methods/primary/packages/algorithm.sty
@@ -0,0 +1,100 @@
+%%
+%% This is file `algorithm.sty',
+%% generated with the docstrip utility.
+%%
+%% The original source files were:
+%%
+%% algorithms.dtx  (with options: `algorithm')
+%% This is a generated file.
+%% 
+%% Copyright (C) 1994-2004   Peter Williams <pwil3058 at bigpond.net.au>
+%% Copyright (C) 2005-2009   Rog�rio Brito <rbrito at ime.usp.br>
+%% 
+%% This document file is free software; you can redistribute it and/or
+%% modify it under the terms of the GNU Lesser General Public License as
+%% published by the Free Software Foundation; either version 2 of the
+%% License, or (at your option) any later version.
+%% 
+%% This document file is distributed in the hope that it will be useful, but
+%% WITHOUT ANY WARRANTY; without even the implied warranty of
+%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+%% General Public License for more details.
+%% 
+%% You should have received a copy of the GNU Lesser General Public License
+%% along with this document file; if not, write to the Free Software
+%% Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+%% USA.
+%% 
+\NeedsTeXFormat{LaTeX2e}[1999/12/01]
+\ProvidesPackage{algorithm}
+   [2009/08/24 v0.1 Document Style `algorithm' - floating environment]
+\RequirePackage{float}
+\RequirePackage{ifthen}
+\newcommand{\ALG at within}{nothing}
+\newboolean{ALG at within}
+\setboolean{ALG at within}{false}
+\newcommand{\ALG at floatstyle}{ruled}
+\newcommand{\ALG at name}{Algorithm}
+\newcommand{\listalgorithmname}{List of \ALG at name s}
+% Declare Options:
+% * first: appearance
+\DeclareOption{plain}{
+  \renewcommand{\ALG at floatstyle}{plain}
+}
+\DeclareOption{ruled}{
+  \renewcommand{\ALG at floatstyle}{ruled}
+}
+\DeclareOption{boxed}{
+  \renewcommand{\ALG at floatstyle}{boxed}
+}
+% * then: numbering convention
+\DeclareOption{part}{
+  \renewcommand{\ALG at within}{part}
+  \setboolean{ALG at within}{true}
+}
+\DeclareOption{chapter}{
+  \renewcommand{\ALG at within}{chapter}
+  \setboolean{ALG at within}{true}
+}
+\DeclareOption{section}{
+  \renewcommand{\ALG at within}{section}
+  \setboolean{ALG at within}{true}
+}
+\DeclareOption{subsection}{
+  \renewcommand{\ALG at within}{subsection}
+  \setboolean{ALG at within}{true}
+}
+\DeclareOption{subsubsection}{
+  \renewcommand{\ALG at within}{subsubsection}
+  \setboolean{ALG at within}{true}
+}
+\DeclareOption{nothing}{
+  \renewcommand{\ALG at within}{nothing}
+  \setboolean{ALG at within}{true}
+}
+\DeclareOption*{\edef\ALG at name{\CurrentOption}}
+% ALGORITHM
+%
+\ProcessOptions
+\floatstyle{\ALG at floatstyle}
+\ifthenelse{\boolean{ALG at within}}{
+  \ifthenelse{\equal{\ALG at within}{part}}
+     {\newfloat{algorithm}{htbp}{loa}[part]}{}
+  \ifthenelse{\equal{\ALG at within}{chapter}}
+     {\newfloat{algorithm}{htbp}{loa}[chapter]}{}
+  \ifthenelse{\equal{\ALG at within}{section}}
+     {\newfloat{algorithm}{htbp}{loa}[section]}{}
+  \ifthenelse{\equal{\ALG at within}{subsection}}
+     {\newfloat{algorithm}{htbp}{loa}[subsection]}{}
+  \ifthenelse{\equal{\ALG at within}{subsubsection}}
+     {\newfloat{algorithm}{htbp}{loa}[subsubsection]}{}
+  \ifthenelse{\equal{\ALG at within}{nothing}}
+     {\newfloat{algorithm}{htbp}{loa}}{}
+}{
+  \newfloat{algorithm}{htbp}{loa}
+}
+\floatname{algorithm}{\ALG at name}
+\newcommand{\listofalgorithms}{\listof{algorithm}{\listalgorithmname}}
+\endinput
+%%
+%% End of file `algorithm.sty'.
diff --git a/docs/methods/primary/packages/algorithmicx.sty b/docs/methods/primary/packages/algorithmicx.sty
new file mode 100644
index 0000000..bfb7dab
--- /dev/null
+++ b/docs/methods/primary/packages/algorithmicx.sty
@@ -0,0 +1,786 @@
+% ALGORITHMIC STYLE -- Released 27 APR 2005
+%    for LaTeX version 2e
+%
+% Copyright Szasz Janos
+% E-mail szaszjanos at users.sourceforge.net
+%
+%
+%      ***      INITIALISING      ***
+%
+%
+\NeedsTeXFormat{LaTeX2e}
+\ProvidesPackage{algorithmicx}[2005/04/27 v1.2 Algorithmicx]
+\RequirePackage{ifthen}
+\typeout{Document Style algorithmicx 1.2 - a greatly improved `algorithmic' style}
+%
+\newcounter{ALG at line}
+\newcounter{ALG at rem}
+\newcounter{ALG at nested}
+\newlength{\ALG at tlm}
+\newlength{\ALG at thistlm}
+\newcounter{ALG at Lnr}% the number of defined languages
+\setcounter{ALG at Lnr}{0}
+\newcounter{ALG at blocknr}% the number of defined blocks
+\setcounter{ALG at blocknr}{0}
+\newcounter{ALG at storecount}% number of stored but not restored algorithmic environments
+\setcounter{ALG at storecount}{0}
+\newcounter{ALG at tmpcounter}% only to decrement things
+\newlength\ALG at tmplength%
+%\def\algorithmicnoindent{-\ALG at tlm}
+%       \def\algbackskipbegin{\hskip\ALG at ctlm}
+%\def\algbackskip{\hskip-\ALG at thistlm}
+%\def\algbackskipend{\hskip-\ALG at tlm}
+\def\ALG at defaultindent{\algorithmicindent}
+%
+% conditional states
+%
+\def\ALG at newcondstate#1%
+   {%
+   \expandafter\edef\csname ALG at x@#1\endcsname%
+      {\expandafter\noexpand\csname @@ALG at x@#1\endcsname}%
+   }%
+\ALG at newcondstate{notext}%
+\ALG at newcondstate{default}%
+%
+%
+%      ***      ALGORITHMIC      ***
+%
+%
+\newcommand\ALG at beginblock[1]% #1 - indentation
+   {%
+      \ALG at thistlm\ALG at tlm%
+      \addtolength\ALG at tlm{#1}%
+      \addtocounter{ALG at nested}{1}%
+      \setlength\ALG at tmplength{#1}%
+      \expandafter\edef\csname ALG at ind@\theALG at nested\endcsname{\the\ALG at tmplength}%
+   }%
+\newcommand\ALG at endblock%
+   {%
+      \addtolength\ALG at tlm{-\csname ALG at ind@\theALG at nested\endcsname}%
+      \addtocounter{ALG at nested}{-1}%
+      \ALG at thistlm\ALG at tlm%
+   }%
+%
+%   algorithmic environment
+%
+\def\ALG at step%
+   {%
+   \addtocounter{ALG at line}{1}%
+   \addtocounter{ALG at rem}{1}%
+   \ifthenelse{\equal{\arabic{ALG at rem}}{\ALG at numberfreq}}%
+      {\setcounter{ALG at rem}{0}\alglinenumber{\arabic{ALG at line}}}%
+      {}%
+   }%
+\newenvironment{algorithmic}[1][0]%
+   {%
+   \edef\ALG at numberfreq{#1}%
+   \def\@currentlabel{\theALG at line}%
+   %
+   \setcounter{ALG at line}{0}%
+   \setcounter{ALG at rem}{0}%
+   %
+   \let\\\algbreak%
+   %
+   \expandafter\edef\csname ALG at currentblock@\theALG at nested\endcsname{0}%
+   \expandafter\let\csname ALG at currentlifetime@\theALG at nested\endcsname\relax%
+   %
+   \begin{list}%
+      {\ALG at step}%
+      {%
+      \rightmargin\z@%
+      \itemsep\z@ \itemindent\z@ \listparindent2em%
+      \partopsep\z@ \parskip\z@ \parsep\z@%
+      \labelsep 0.5em \topsep 0.2em%\skip 1.2em 
+      \ifthenelse{\equal{#1}{0}}%
+         {\labelwidth 0.5em}%
+         {\labelwidth 1.2em}%
+      \leftmargin\labelwidth \addtolength{\leftmargin}{\labelsep}% Ok. the perfect leftmargin :-))
+      \ALG at tlm\z@%
+      }%
+   \setcounter{ALG at nested}{0}%
+   \ALG at beginalgorithmic%
+   }%
+   {% end{algorithmic}
+   % check if all blocks are closed
+   \ALG at closeloops%
+   \expandafter\ifnum\csname ALG at currentblock@\theALG at nested\endcsname=0\relax%
+   \else%
+      \PackageError{algorithmicx}{Some blocks are not closed!!!}{}%
+   \fi%
+   \ALG at endalgorithmic%
+   \end{list}%
+   }%
+%
+%
+%   ***   Functional core   ***
+%
+%
+\def\ALG at makeentity#1% execute the entity (#1)
+   {%
+   \def\ALG at thisentity{#1}%
+   \expandafter\ifx\csname ALG at b@\ALG at L @#1 at 0\endcsname\relax%
+      \let\ALG at makenobeginrepeat\ALG at makenobegin\ALG at makenobeginrepeat% this entitie ends or continues blocks
+   \else%
+      \let\ALG at makebeginrepeat\ALG at makebegin\ALG at makebeginrepeat% this entitie can open blocks
+   \fi%
+   \ALG at entitiecommand%
+   }%
+%
+\def\ALG at makebegin% executes an entitie that can open blocks
+   {%
+   \expandafter\let\expandafter\ALG at thislifetime\csname ALG at currentlifetime@\theALG at nested\endcsname%
+   \ifx\ALG at thislifetime\relax%
+      \let\ALG at makebeginrepeat\ALG at doentity% in infinite block I can open my block
+   \else%
+      \ifnum\ALG at thislifetime>0\relax%
+         \ifnum\ALG at thislifetime>65534\else%
+            \setcounter{ALG at tmpcounter}{\ALG at thislifetime}% the block has 'space' for another included block
+            \addtocounter{ALG at tmpcounter}{-1}%
+            \expandafter\edef\csname ALG at currentlifetime@\theALG at nested\endcsname{\arabic{ALG at tmpcounter}}%
+         \fi%
+         \let\ALG at makebeginrepeat\ALG at doentity%
+      \else% the block needs to be closed
+         \expandafter\ifx\csname ALG at b@\ALG at L @\ALG at thisentity @\csname ALG at currentblock@\theALG at nested\endcsname\endcsname\relax%
+            \ALG at closebyforce% I can not close this block, continue after it is closed by force
+%            \ALG at makebegin%
+         \else%
+            % the block would be closed automatically, but this entitie can close it, so let's do it with the entity
+            \let\ALG at makebeginrepeat\ALG at doentity%
+         \fi%
+      \fi%
+   \fi%
+   \ALG at makebeginrepeat%
+   }%
+%
+\def\ALG at makenobegin% executes an entitie that can not open blocks
+   {%
+   \expandafter\ifx\csname ALG at currentlifetime@\theALG at nested\endcsname\relax%
+      \let\ALG at makenobeginrepeat\ALG at doentity% an infinite block must be broken
+   \else%
+      \expandafter\ifx\csname ALG at b@\ALG at L @\ALG at thisentity @\csname ALG at currentblock@\theALG at nested\endcsname\endcsname\relax%
+         \ALG at closebyforce% the block must be ended by force,
+      \else%
+         \let\ALG at makenobeginrepeat\ALG at doentity% I can continue / end this block, let's do it
+      \fi%
+   \fi%
+   \ALG at makenobeginrepeat%
+   }%
+%
+\def\ALG at dobegin%
+   {%
+   \ALG at beginblock{\csname ALG at i@\ALG at L @\ALG at thisentity @\ALG at thisblock\endcsname}%
+   \expandafter\edef\csname ALG at currentblock@\theALG at nested\endcsname{\csname ALG at b@\ALG at L @\ALG at thisentity @\ALG at thisblock\endcsname}%
+   \expandafter\ifx\csname ALG at c@\ALG at L @\ALG at thisentity @\ALG at thisblock\endcsname\relax%
+      \expandafter\let\csname ALG at currentlifetime@\theALG at nested\endcsname\relax%
+   \else%
+      \expandafter\edef\csname ALG at currentlifetime@\theALG at nested\endcsname{\csname ALG at c@\ALG at L @\ALG at thisentity @\ALG at thisblock\endcsname}%
+   \fi%
+   }%
+%
+\def\ALG at doend%
+   {%
+   \ALG at endblock%
+   }%
+%
+\def\ALG at doentity% the number of the closed block, the entitie
+   {%
+   \edef\ALG at thisblock{\csname ALG at currentblock@\theALG at nested\endcsname}%
+   \expandafter\ifx\csname ALG at b@\ALG at L @\ALG at thisentity @\ALG at thisblock\endcsname\relax%
+      \def\ALG at thisblock{0}%
+   \fi%
+   \ALG at getentitytext%
+   \ifnum\ALG at thisblock=0\else\ALG at doend\fi%
+   \ifx\ALG at text\ALG at x@notext%
+      \item[]\nointerlineskip%\vskip-\prevdepth\nointerlineskip% bug: if there are no text and no lines, then this is wrong
+   \else%
+      \item%
+   \fi%
+   \noindent\hskip\ALG at tlm%
+   \expandafter\ifnum0=\csname ALG at b@\ALG at L @\ALG at thisentity @\ALG at thisblock\endcsname\else%
+      \ALG at dobegin%
+   \fi%
+   \def\ALG at entitiecommand{\ALG at displayentity}%
+   }%
+%
+\def\ALG at getentitytext%
+   {%
+   \expandafter\let\expandafter\ALG at text\csname ALG at t@\ALG at L @\ALG at thisentity @\ALG at thisblock\endcsname%
+   \ifx\ALG at text\ALG at x@default%
+      % block specific - default
+      \expandafter\let\expandafter\ALG at text\csname ALG at t@\ALG at L @\ALG at thisentity\endcsname%
+      \ifx\ALG at text\ALG at x@default%
+         % block specific - default, language specific - default
+         \def\ALG at text{\ALG at deftext{\ALG at thisentity}}%
+      \fi%
+   \fi%
+   }%
+%
+\def\ALG at deftext{\csname ALG at deftext@\ALG at L\endcsname}%
+%
+\def\ALG at displayentity%
+   {%
+   \ifx\ALG at text\ALG at x@notext%
+      \let\ALG at text\relax%
+   \fi
+   \ALG at text%
+   }%
+%
+\def\ALG at closebyforce%
+   {%
+   \ALG at endblock%
+   }%
+%
+\def\ALG at closeloops% closes all finite blocks
+   {%
+   \expandafter\ifx\csname ALG at currentlifetime@\theALG at nested\endcsname\relax%
+   \else% only if it is finite
+      \ALG at closebyforce% the block must be ended by force,
+      \ALG at closeloops% the command still runs
+   \fi%
+   }%
+%
+%
+%   ***   Low level block/entitie defining commands   ***
+%
+%
+\def\ALG at bl@{0}% the BIG block
+\let\ALG at bl@@\ALG at bl@% the BIG block
+%
+%  Create a block
+%
+\def\ALG at createblock#1% create the block #1, if it does not exists
+   {%
+   \@ifundefined{ALG at bl@\ALG at Ld @#1}% needs to be created?
+      {%
+      \addtocounter{ALG at blocknr}{1}% increment the block counter
+      \expandafter\edef\csname ALG at bl@\ALG at Ld @#1\endcsname{\arabic{ALG at blocknr}}% set the block number
+      }%
+      {}%
+   }%
+%
+%  Get the block number
+%
+\def\ALG at getblocknumber#1{\csname ALG at bl@\ALG at Ld @#1\endcsname}%
+%
+%  Create an entitie
+%
+\def\ALG at createentitie#1% create the entitie #1, if it does not exists
+   {%
+   \expandafter\ALG at edefcmd\csname #1\endcsname{\noexpand\ALG at makeentity{#1}}%
+   \@ifundefined{ALG at t@\ALG at Ld @#1}% the entity text is defined in this language?
+      {%
+      \expandafter\let\csname ALG at t@\ALG at Ld @#1\endcsname\ALG at x@default%
+      }%
+      {}%
+   }%
+%
+\def\ALG at createtext#1#2% #1 = closed block; #2 = entitie; creates \ALG at t@#2@#1
+   {%
+   \expandafter\let\csname ALG at t@\ALG at Ld @#2@#1\endcsname\ALG at x@default%
+   }%
+%
+%  End and Continue block
+%
+\def\ALG at endandcontinueblock#1#2#3#4#5% #1 = new block; #2 = old block; #3 = entitie; #4 = credits; #5 = indent
+   {%
+   \ifthenelse{\equal{#3}{}}{}% execute only if the entity is not empty
+      {%
+      \ALG at createentitie{#3}% create the entitie
+      \ALG at createblock{#2}% create the old block, if needed
+      \ifthenelse{\equal{#1}{}}% whe need to open a new block?
+         {\expandafter\edef\csname ALG at b@\ALG at Ld @#3@\ALG at getblocknumber{#2}\endcsname{0}}% no, just close the old one
+         {% yes,
+         \ALG at createblock{#1}% create the block
+         \expandafter\edef\csname ALG at b@\ALG at Ld @#3@\ALG at getblocknumber{#2}\endcsname{\ALG at getblocknumber{#1}}% ending the old block opens a new one
+         \ifthenelse{\equal{#4}{}}% infinite or finite credits?
+            {\expandafter\let\csname ALG at c@\ALG at Ld @#3@\ALG at getblocknumber{#2}\endcsname\relax}% infinite credits
+            {\expandafter\edef\csname ALG at c@\ALG at Ld @#3@\ALG at getblocknumber{#2}\endcsname{#4}}% finite credits
+         \ifthenelse{\equal{#5}{}}% default or specified indentation
+            {\expandafter\let\csname ALG at i@\ALG at Ld @#3@\ALG at getblocknumber{#2}\endcsname\ALG at defaultindent}% default indentation
+            {\expandafter\edef\csname ALG at i@\ALG at Ld @#3@\ALG at getblocknumber{#2}\endcsname{#5}}% indentation is specified
+         }%
+      \ALG at createtext{\ALG at getblocknumber{#2}}{#3}%
+      }%
+   }%
+%
+%   macros used in declarations
+%
+\def\ALG at p@endtext at E{\algrenewtext{\ALG at v@end}}%
+\def\ALG at p@endtext at xE{\algrenewtext[\ALG at v@newblock]{\ALG at v@end}}%
+\def\ALG at p@endtext at nE{\algnotext{\ALG at v@end}}%
+\def\ALG at p@endtext at xnE{\algnotext[\ALG at v@newblock]{\ALG at v@end}}%
+\def\ALG at p@endtext@{}%
+% starttext defines are more compex -- care must be taken for the optional parameters
+\def\ALG at p@starttext at S{\ALG at p@s at process{\algrenewtext}}%
+\def\ALG at p@starttext at C{\ALG at p@s at process{\algrenewtext}}%
+\def\ALG at p@starttext at xC{\ALG at p@s at process{\algrenewtext[\ALG at v@oldblock]}}%
+\def\ALG at p@s at process#1%
+   {%
+   \ifthenelse{\equal{\ALG at v@start}{}}%
+      {\ALG at p@endtext}%
+      {\@ifnextchar{[}{\ALG at p@s at getparamcount{#1}}{\ALG at p@s at simple{#1}}}%
+   }%
+\def\ALG at p@s at getparamcount#1[#2]%
+   {%
+   \@ifnextchar{[}{\ALG at p@s at getdefparam{#1}{#2}}{\ALG at p@s at param{#1}{#2}}%
+   }%
+\def\ALG at p@s at getdefparam#1#2[#3]%
+   {%
+   \ALG at p@s at defparam{#1}{#2}{#3}%
+   }%
+\def\ALG at p@s at simple#1#2{#1{\ALG at v@start}{#2}\ALG at p@endtext}%
+\def\ALG at p@s at param#1#2#3{#1{\ALG at v@start}[#2]{#3}\ALG at p@endtext}%
+\def\ALG at p@s at defparam#1#2#3#4{#1{\ALG at v@start}[#2][#3]{#4}\ALG at p@endtext}%
+% the rest of the crew
+\def\ALG at p@starttext at nS{\algnotext{\ALG at v@start}\ALG at p@endtext}%
+\def\ALG at p@starttext at nC{\algnotext{\ALG at v@start}\ALG at p@endtext}%
+\def\ALG at p@starttext at xnC{\algnotext[\ALG at v@oldblock]{\ALG at v@start}\ALG at p@endtext}%
+\def\ALG at p@starttext@{\ALG at p@endtext}%
+\def\ALG at p@indent at def#1{\def\ALG at v@indent{#1}\ALG at p@setup}%
+\def\ALG at p@indent@{\def\ALG at v@indent{}\ALG at p@setup}%
+\def\ALG at p@credits at def#1{\def\ALG at v@credits{#1}\ALG at p@indent}%
+\def\ALG at p@credits@{\ALG at p@indent}%
+\def\ALG at p@end at def#1{\def\ALG at v@end{#1}\ALG at p@credits}%
+\def\ALG at p@end@{\def\ALG at v@end{}\ALG at p@credits}%
+\def\ALG at p@start at def#1{\def\ALG at v@start{#1}\ALG at p@end}%
+\def\ALG at p@start@{\def\ALG at v@start{}\ALG at p@end}%
+\def\ALG at p@oldblock at def#1{\def\ALG at v@oldblock{#1}\ALG at p@start}%
+\def\ALG at p@oldblock@{\def\ALG at v@oldblock{}\ALG at p@start}%
+\newcommand\ALG at p@newblock[1][]{\def\ALG at v@newblock{#1}\ALG at p@oldblock}%
+\def\ALG at p@setup%
+   {%
+   \ifthenelse{\equal{\ALG at v@newblock}{}}%
+      {%
+      \ifthenelse{\equal{\ALG at v@start}{}}%
+         {%
+            \PackageError{algorithmicx}{Block or starting entitie must be specified!!!}{}%
+         }%
+         {%
+            \let\ALG at v@newblock\ALG at v@start%
+         }%
+      }%
+      {%
+      }%
+   \ALG at endandcontinueblock%
+      {\ALG at v@newblock}{\ALG at v@oldblock}{\ALG at v@start}%
+      {\ALG at v@credits}{\ALG at v@indent}%
+   \ALG at endandcontinueblock%
+      {}{\ALG at v@newblock}{\ALG at v@end}%
+      {}{}%
+   \ALG at p@starttext%
+   }%
+%
+%   param handling
+%
+\newcommand\ALG at p@def[2][def]%
+   {%
+   \expandafter\let\csname ALG at p@#2\expandafter\endcsname\csname ALG at p@#2@#1\endcsname%
+   }%
+\def\ALG at p@undef{\ALG at p@def[]}%
+%
+\def\ALG at p@ons{\ALG at p@def{start}}%
+\def\ALG at p@onS{\ALG at p@def{start}\ALG at p@def[S]{starttext}}%
+\def\ALG at p@onc{\ALG at p@def{oldblock}\ALG at p@def{start}}%
+\def\ALG at p@onC{\ALG at p@def{oldblock}\ALG at p@def{start}\ALG at p@def[C]{starttext}}%
+\def\ALG at p@one{\ALG at p@def{end}}%
+\def\ALG at p@onE{\ALG at p@def{end}\ALG at p@def[E]{endtext}}%
+\def\ALG at p@onxC{\ALG at p@def{oldblock}\ALG at p@def{start}\ALG at p@def[xC]{starttext}}%
+\def\ALG at p@onxE{\ALG at p@def{end}\ALG at p@def[xE]{endtext}}%
+\def\ALG at p@onnS{\ALG at p@def{start}\ALG at p@def[nS]{starttext}}%
+\def\ALG at p@onnC{\ALG at p@def{oldblock}\ALG at p@def{start}\ALG at p@def[nC]{starttext}}%
+\def\ALG at p@onnE{\ALG at p@def{end}\ALG at p@def[nE]{endtext}}%
+\def\ALG at p@onxnC{\ALG at p@def{oldblock}\ALG at p@def{start}\ALG at p@def[xnC]{starttext}}%
+\def\ALG at p@onxnE{\ALG at p@def{end}\ALG at p@def[xnE]{endtext}}%
+\def\ALG at p@onb{\def\ALG at v@credits{}}%
+\def\ALG at p@onl{\def\ALG at v@credits{1}}%
+\def\ALG at p@onL{\ALG at p@def{credits}}%
+\def\ALG at p@oni{\ALG at p@def{indent}}%
+%
+\def\ALG at p@main#1%
+   {%
+   \@ifundefined{ALG at ps@\ALG at p@state @#1}%
+      {%
+      \csname ALG at ps@\ALG at p@state @other\endcsname{#1}%
+      }%
+      {%
+      \csname ALG at ps@\ALG at p@state @#1\endcsname%
+      }%
+   \ALG at p@rec%
+   }%
+% STATE : <<starting state>>
+\expandafter\def\csname ALG at ps@@]\endcsname{\let\ALG at p@rec\relax}%
+\def\ALG at ps@@s{\ALG at p@ons}%
+\def\ALG at ps@@S{\ALG at p@onS}%
+\def\ALG at ps@@c{\ALG at p@onc}%
+\def\ALG at ps@@C{\ALG at p@onC}%
+\def\ALG at ps@@e{\ALG at p@one}%
+\def\ALG at ps@@E{\ALG at p@onE}%
+\def\ALG at ps@@N{\typeout{algdef: 'N' obsoloted, use 'nE'.}\ALG at p@onnE}%
+\def\ALG at ps@@b{\ALG at p@onb}%
+\def\ALG at ps@@l{\ALG at p@onl}%
+\def\ALG at ps@@L{\ALG at p@onL}%
+\def\ALG at ps@@i{\ALG at p@oni}%
+\def\ALG at ps@@x{\def\ALG at p@state{x}}%
+\def\ALG at ps@@n{\def\ALG at p@state{n}}%
+\def\ALG at ps@@other#1{\typeout{algdef: Ignoring unknown token #1}}%
+% STATE : x
+\def\ALG at ps@x at C{\def\ALG at p@state{}\ALG at p@onxC}%
+\def\ALG at ps@x at E{\def\ALG at p@state{}\ALG at p@onxE}%
+\def\ALG at ps@x at N{\def\ALG at p@state{}\typeout{algdef: 'xN' obsoloted, use 'xnE'.}\ALG at p@onxnE}%
+\def\ALG at ps@x at n{\def\ALG at p@state{xn}}%
+\def\ALG at ps@x at other#1%
+   {%
+   \typeout{algdef: Ignoring 'x' before '#1'.}%
+   \def\ALG at p@state{}%
+   \def\ALG at p@rec{\let\ALG at p@rec\ALG at p@main\ALG at p@rec#1}%
+   }%
+% STATE : n
+\def\ALG at ps@n at S{\def\ALG at p@state{}\ALG at p@onnS}%
+\def\ALG at ps@n at C{\def\ALG at p@state{}\ALG at p@onnC}%
+\def\ALG at ps@n at E{\def\ALG at p@state{}\ALG at p@onnE}%
+\def\ALG at ps@n at x{\def\ALG at p@state{nx}}%
+\def\ALG at ps@n at other#1%
+   {%
+   \typeout{algdef: Ignoring 'n' before '#1'.}%
+   \def\ALG at p@state{}%
+   \def\ALG at p@rec{\let\ALG at p@rec\ALG at p@main\ALG at p@rec#1}%
+   }%
+% STATE : xn
+\def\ALG at ps@xn at C{\def\ALG at p@state{}\ALG at p@onxnC}%
+\def\ALG at ps@xn at E{\def\ALG at p@state{}\ALG at p@onxnE}%
+\def\ALG at ps@xn at x{\typeout{algdef: Ignoring 'x' after 'xn'.}}%
+\def\ALG at ps@xn at n{\typeout{algdef: Ignoring 'n' after 'xn'.}}%
+\def\ALG at ps@xn at other#1%
+   {%
+   \typeout{algdef: Ignoring 'xn' before '#1'.}%
+   \def\ALG at p@state{}%
+   \def\ALG at p@rec{\let\ALG at p@rec\ALG at p@main\ALG at p@rec#1}%
+   }%
+% STATE : nx
+\def\ALG at ps@nx at C{\def\ALG at p@state{}\ALG at p@onxnC}%
+\def\ALG at ps@nx at E{\def\ALG at p@state{}\ALG at p@onxnE}%
+\def\ALG at ps@nx at x{\typeout{algdef: Ignoring 'x' after 'nx'.}}%
+\def\ALG at ps@nx at n{\typeout{algdef: Ignoring 'n' after 'nx'.}}%
+\def\ALG at ps@nx at other#1%
+   {%
+   \typeout{algdef: Ignoring 'nx' before '#1'.}%
+   \def\ALG at p@state{}%
+   \def\ALG at p@rec{\let\ALG at p@rec\ALG at p@main\ALG at p@rec#1}%
+   }%
+%
+%
+%   ***   User level block/entitie commands   ***
+%
+%
+%
+%   algdef{switches}... -- the king of all definitions in the algorithmicx package
+%
+\newcommand\algdef[1]%
+   {%
+   \ALG at p@undef{oldblock}%
+   \ALG at p@undef{start}%
+   \ALG at p@undef{end}%
+   \def\ALG at v@credits{}%
+   \ALG at p@undef{credits}%
+   \ALG at p@undef{indent}%
+   \ALG at p@undef{starttext}%
+   \ALG at p@undef{endtext}%
+   \def\ALG at p@state{}%
+   \let\ALG at p@rec\ALG at p@main%
+   \ALG at p@rec#1]%
+   \ALG at p@newblock%
+   }%
+%
+%   a lot of other macros are provided for convenience
+%
+\def\algblock{\algdef{se}}%
+\def\algcblock{\algdef{ce}}%
+\def\algloop{\algdef{sl}}%
+\def\algcloop{\algdef{cl}}%
+\def\algsetblock{\algdef{seLi}}%
+\def\algsetcblock{\algdef{ceLi}}%
+\def\algblockx{\algdef{SxE}}%
+\def\algblockdefx{\algdef{SE}}%
+\def\algcblockx{\algdef{CxE}}%
+\def\algcblockdefx{\algdef{CE}}%
+\def\algsetblockx{\algdef{SxELi}}%
+\def\algsetblockdefx{\algdef{SELi}}%
+\def\algsetcblockx{\algdef{CxELi}}%
+\def\algsetcblockdefx{\algdef{CELi}}%
+\def\algloopdefx{\algdef{Sl}}%
+\def\algcloopx{\algdef{xCl}}%
+\def\algcloopdefx{\algdef{Cl}}%
+% algloopx is not correct, use algloopdefx
+%
+%   Text output commands
+%
+\newcommand\algrenewtext[2][]% [block]{entity}
+   {%
+   \ifthenelse{\equal{#2}{}}{}%
+      {%
+      \ifthenelse{\equal{#1}{}}%
+         {%
+         \expandafter\let\csname ALG at t@\ALG at Ld @#2\endcsname\relax%
+         \expandafter\newcommand\csname ALG at t@\ALG at Ld @#2\endcsname%
+         }%
+         {%
+         \expandafter\let\csname ALG at t@\ALG at Ld @#2@\ALG at getblocknumber{#1}\endcsname\relax%
+         \expandafter\newcommand\csname ALG at t@\ALG at Ld @#2@\ALG at getblocknumber{#1}\endcsname%
+         }%
+      }%
+   }%
+%
+\def\ALG at letentitytext#1#2% [block]{entity}
+   {%
+   \ifthenelse{\equal{#2}{}}{}%
+      {%
+      \ifthenelse{\equal{#1}{}}%
+         {%
+         \expandafter\let\csname ALG at t@\ALG at Ld @#2\endcsname%
+         }%
+         {%
+         \expandafter\let\csname ALG at t@\ALG at Ld @#2@\ALG at getblocknumber{#1}\endcsname%
+         }%
+      }%
+   }%
+%
+\newcommand\algnotext[2][]% [block]{entity}
+   {%
+   \ALG at letentitytext{#1}{#2}\ALG at x@notext%
+   }%
+%
+\newcommand\algdefaulttext[2][]% [block]{entity}
+   {%
+   \ALG at letentitytext{#1}{#2}\ALG at x@default%
+   }%
+%
+\def\ALG at notext*{\algnotext}%
+\def\algtext{\@ifnextchar{*}{\ALG at notext}{\algrenewtext}}%
+%
+%
+%   ***   LANGUAGE SWITCHING   ***
+%
+%
+%
+\newcommand\algnewlanguage[1]%
+   {%
+   \@ifundefined{ALG at L@#1}% needs to be created?
+      {}%
+      {%
+      \PackageError{algorithmicx}{Language '#1' already defined!}{}%
+      }%
+   \addtocounter{ALG at Lnr}{1}% increment the language counter
+   \expandafter\edef\csname ALG at L@#1\endcsname{\arabic{ALG at Lnr}}% set the language number
+   \edef\ALG at Ld{\csname ALG at L@#1\endcsname}%
+   \expandafter\let\csname ALG at bl@\ALG at Ld @\endcsname\ALG at bl@% the BIG block
+   \expandafter\let\csname ALG at bl@\ALG at Ld @@\endcsname\ALG at bl@% the BIG block
+   \algdef{SL}[STATE]{State}{0}{}%
+   \expandafter\def\csname ALG at deftext@\ALG at Ld\endcsname{\textbf}%
+   \algnewcommand\algorithmiccomment[1]{\hfill\(\triangleright\) ##1}%
+   \algnewcommand\algorithmicindent{1.5em}%
+   \algnewcommand\alglinenumber[1]{\footnotesize ##1:}%
+   \algnewcommand\ALG at beginalgorithmic\relax% for user overrides
+   \algnewcommand\ALG at endalgorithmic\relax% for user overrides
+   }%
+%
+\newcommand\algsetlanguage[1]%
+   {%
+   \@ifundefined{ALG at L@#1}% needs to be created?
+      {%
+      \PackageError{algorithmicx}{Language '#1' is not yet defined!}{}%
+      }{}%
+   \edef\ALG at L{\csname ALG at L@#1\endcsname}%
+   }%
+%
+\newcommand\algdeflanguage[1]%
+   {%
+   \@ifundefined{ALG at L@#1}% needs to be created?
+      {%
+      \PackageError{algorithmicx}{Language '#1' is not yet defined!}{}%
+      }{}%
+   \edef\ALG at Ld{\csname ALG at L@#1\endcsname}%
+   }%
+%
+\newcommand\alglanguage[1]%
+   {%
+   \algdeflanguage{#1}%
+   \algsetlanguage{#1}%
+   }%
+%
+%
+%   ***   Defining language dependent stuff   ***
+%
+%
+\def\ALG at eatoneparam#1{}%
+\def\ALG at defbasecmd#1#2%
+   {%
+   \edef\ALG at tmp{\expandafter\ALG at eatoneparam\string #2}%
+   \@ifundefined\ALG at tmp{\edef #2{\noexpand\csname ALG at cmd@\noexpand\ALG at L @\ALG at tmp\endcsname}}{}%
+   \expandafter#1\csname ALG at cmd@\ALG at Ld @\ALG at tmp\endcsname%
+   }%
+\newcommand\algnewcommand{\ALG at defbasecmd\newcommand}%
+\newcommand\algrenewcommand{\ALG at defbasecmd\renewcommand}%
+\def\ALG at letcmd{\ALG at defbasecmd\let}%
+\def\ALG at defcmd{\ALG at defbasecmd\def}%
+\def\ALG at edefcmd{\ALG at defbasecmd\edef}%
+%
+%
+%   ***   OTHERS   ***
+%
+%
+\def\BState{\State \algbackskip}%
+\def\Statex{\item[]}% an empty line
+\newcommand\algrenewcomment{\algrenewcommand\algorithmiccomment}%
+\def\Comment{\algorithmiccomment}%
+\def\algref#1#2{\ref{#1}.\ref{#2}}%
+\algnewlanguage{default}%
+\algsetlanguage{default}%
+%
+%
+%   ***   Line breaks   ***
+%
+%
+\newcommand\algbreak% for multiline parameters !!! needs fix
+   {%
+      \item%
+%      \hskip\ALG at parindent%!!! not yet implemented
+%      \hskip-\algorithmicindent%
+   }%
+%
+\def\ALG at noputindents%
+   {%
+   \hskip\ALG at tlm%
+   }%
+%
+%
+%   ***   algorithm store / restore   ***
+%
+%
+%   store
+%
+\ALG at newcondstate{mustrestore}%
+\def\algstore%
+   {%
+   \renewcommand\ALG at beginblock%
+      {%
+      \PackageError{algorithmicx}{The environment must be closed after store!}{}%
+      }%
+   \@ifstar{\ALG at starstore}{\ALG at nostarstore}%
+   }%
+\def\ALG at nostarstore#1% save all infos into #1 and terminate the algorithmic block
+   {%
+   \addtocounter{ALG at storecount}{1}%
+   \expandafter\global\expandafter\let\csname ALG at save@mustrestore@#1\endcsname\ALG at x@mustrestore%
+   \ALG at starstore{#1}%
+   }%
+\def\ALG at starstore#1%
+   {%
+   \@ifundefined{ALG at save@line@#1}{}%
+       {\PackageError{algorithmicx}{This save name '#1' is already used!}{}}%
+   \def\ALG at savename{#1}%
+   \expandafter\xdef\csname ALG at save@totalnr@\ALG at savename\endcsname{\theALG at nested}%
+   \expandafter\xdef\csname ALG at save@line@\ALG at savename\endcsname{\theALG at line}%
+   \expandafter\xdef\csname ALG at save@numberfreq@\ALG at savename\endcsname{\ALG at numberfreq}%
+   \expandafter\xdef\csname ALG at save@rem@\ALG at savename\endcsname{\theALG at rem}%
+   \let\ALG at storerepeat\ALG at store%
+   \ALG at storerepeat%
+   }%
+\def\ALG at store% simply terminate all open blocks
+   {%
+   \ifnum\theALG at nested=0\let\ALG at storerepeat\relax%
+   \else%
+      \expandafter\xdef\csname ALG at save@currentblock@\ALG at savename @\theALG at nested\endcsname%
+         {\csname ALG at currentblock@\theALG at nested\endcsname}%
+      \expandafter\ifx\csname ALG at currentlifetime@\theALG at nested\endcsname\relax%
+      \else%
+         \expandafter\xdef\csname ALG at save@currentlifetime@\ALG at savename @\theALG at nested\endcsname%
+            {\csname ALG at currentlifetime@\theALG at nested\endcsname}%
+      \fi%
+      \expandafter\xdef\csname ALG at save@ind@\ALG at savename @\theALG at nested\endcsname%
+         {\csname ALG at ind@\theALG at nested\endcsname}%
+      \ALG at closebyforce%
+   \fi%
+   \ALG at storerepeat%
+   }%
+%
+%   restore
+%
+\def\algrestore%
+   {%
+   \@ifstar{\ALG at starrestore}{\ALG at nostarrestore}%
+   }%
+\def\ALG at starrestore%
+   {%
+   \let\ALG at restorerem\relax%
+   \let\ALG at restorereprem\relax%
+   \ALG at restoremain%
+   }%
+\def\ALG at nostarrestore%
+   {%
+   \let\ALG at restorerem\ALG at restoreremovesave%
+   \let\ALG at restorereprem\ALG at restorerepremovesave%
+   \ALG at restoremain%
+   }%
+\def\ALG at restoreremovesave%
+   {%
+   \expandafter\global\expandafter\let\csname ALG at save@totalnr@\ALG at savename\endcsname\relax%
+   \expandafter\global\expandafter\let\csname ALG at save@line@\ALG at savename\endcsname\relax%
+   \expandafter\global\expandafter\let\csname ALG at save@rem@\ALG at savename\endcsname\relax%
+   \expandafter\global\expandafter\let\csname ALG at save@totalnr@\ALG at savename\endcsname\relax%
+   \expandafter\global\expandafter\let\csname ALG at save@numberfreq@\ALG at savename\endcsname\relax%
+   }%
+\def\ALG at restorerepremovesave%
+   {%
+   \expandafter\global\expandafter\let\csname ALG at save@currentblock@\ALG at savename @\theALG at tmpcounter\endcsname\relax%
+   \expandafter\global\expandafter\let\csname ALG at save@currentlifetime@\ALG at savename @\theALG at tmpcounter\endcsname\relax%
+   \expandafter\global\expandafter\let\csname ALG at save@currentlifetime@\ALG at savename @\theALG at tmpcounter\endcsname\relax%
+   \expandafter\global\expandafter\let\csname ALG at save@ind@\ALG at savename @\theALG at tmpcounter\endcsname\relax%
+   }%
+\def\ALG at restoremain#1% restore all infos from #1 in an open algorithmic block
+   {%
+   \ifnum\theALG at line=0%
+      \else\PackageError{algorithmicx}{Restore might be used only at the beginning of the environment!}{}%
+   \fi%
+   \def\ALG at savename{#1}%
+   \expandafter\ifx\csname ALG at save@totalnr@\ALG at savename\endcsname\relax%
+      \PackageError{algorithmicx}{Save '\ALG at savename'\space not defined!!!}{}%
+   \fi%
+   \@ifundefined{ALG at save@mustrestore@\ALG at savename}{}%
+      {%
+      \addtocounter{ALG at storecount}{-1}%
+      \expandafter\global\expandafter\let\csname ALG at save@mustrestore@\ALG at savename\endcsname\relax%
+      }%
+   \setcounter{ALG at line}{\csname ALG at save@line@\ALG at savename\endcsname}%
+   \edef\ALG at numberfreq{\csname ALG at save@numberfreq@\ALG at savename\endcsname}%
+   \setcounter{ALG at rem}{\csname ALG at save@rem@\ALG at savename\endcsname}%
+   \setcounter{ALG at tmpcounter}{\csname ALG at save@totalnr@\ALG at savename\endcsname}%
+   \setcounter{ALG at nested}{0}%
+   \ALG at restorerem%
+   \let\ALG at restorerepeat\ALG at restore%
+   \ALG at restorerepeat%
+   }%
+\def\ALG at restore%
+   {%
+   \ifnum\theALG at tmpcounter>0%
+      \expandafter\edef\csname ALG at currentblock@\theALG at tmpcounter\endcsname%
+         {\csname ALG at save@currentblock@\ALG at savename @\theALG at tmpcounter\endcsname}%
+      \expandafter\ifx\csname ALG at save@currentlifetime@\ALG at savename @\theALG at tmpcounter\endcsname\relax%
+         \expandafter\let\csname ALG at currentlifetime@\theALG at tmpcounter\endcsname\relax%
+         \else%
+            \expandafter\edef\csname ALG at currentlifetime@\theALG at tmpcounter\endcsname%
+            {\csname ALG at save@currentlifetime@\ALG at savename @\theALG at tmpcounter\endcsname}%
+         \fi%
+      %
+      \ALG at beginblock{\csname ALG at save@ind@\ALG at savename @\theALG at tmpcounter\endcsname}%
+      \ALG at restorereprem%
+      \addtocounter{ALG at tmpcounter}{-1}%
+   \else\let\ALG at restorerepeat\relax%
+   \fi%
+   \ALG at restorerepeat%
+   }%
+\AtEndDocument%
+   {%
+   \ifnum\theALG at storecount>0\relax%
+      \PackageError{algorithmicx}{Some stored algorithms are not restored!}{}%
+   \fi%
+   }%
diff --git a/docs/methods/primary/packages/algpseudocode.sty b/docs/methods/primary/packages/algpseudocode.sty
new file mode 100644
index 0000000..fca966a
--- /dev/null
+++ b/docs/methods/primary/packages/algpseudocode.sty
@@ -0,0 +1,92 @@
+% PSEUDOCODE ALGORITHMIC STYLE -- Released 27 APR 2005
+%    for LaTeX version 2e
+%
+% Copyright Szasz Janos
+% E-mail szaszjanos at users.sourceforge.net
+% Based on Peter Williams's algorithmic.sty
+%
+\NeedsTeXFormat{LaTeX2e}%
+\ProvidesPackage{algpseudocode}%
+\RequirePackage{ifthen}%
+\RequirePackage{algorithmicx}%
+\typeout{Document Style - pseudocode environments for use with the `algorithmicx' style}%
+%
+\def\ALG at noend{f}%
+\newboolean{ALG at compatible}%
+\setboolean{ALG at compatible}{false}%
+%
+\DeclareOption{noend}{\def\ALG at noend{t}}%
+\DeclareOption{end}{\def\ALG at noend{f}}%
+\DeclareOption{compatible}{\typeout{For compatibility mode use algcompatible.sty!!!}\setboolean{ALG at compatible}{true}}%
+\DeclareOption{noncompatible}{\setboolean{ALG at noncompatible}{false}}%
+\ProcessOptions%
+%
+%      ***      DECLARATIONS      ***
+%
+\algnewlanguage{pseudocode}%
+\alglanguage{pseudocode}%
+%
+%      ***      KEYWORDS      ***
+%
+\algnewcommand\algorithmicend{\textbf{end}}
+\algnewcommand\algorithmicdo{\textbf{do}}
+\algnewcommand\algorithmicwhile{\textbf{while}}
+\algnewcommand\algorithmicfor{\textbf{for}}
+\algnewcommand\algorithmicforall{\textbf{for all}}
+\algnewcommand\algorithmicloop{\textbf{loop}}
+\algnewcommand\algorithmicrepeat{\textbf{repeat}}
+\algnewcommand\algorithmicuntil{\textbf{until}}
+\algnewcommand\algorithmicprocedure{\textbf{procedure}}
+\algnewcommand\algorithmicfunction{\textbf{function}}
+\algnewcommand\algorithmicif{\textbf{if}}
+\algnewcommand\algorithmicthen{\textbf{then}}
+\algnewcommand\algorithmicelse{\textbf{else}}
+\algnewcommand\algorithmicrequire{\textbf{Require:}}
+\algnewcommand\algorithmicensure{\textbf{Ensure:}}
+\algnewcommand\algorithmicreturn{\textbf{return}}
+\algnewcommand\textproc{\textsc}
+%
+%      ***      DECLARED LOOPS      ***
+%
+\algdef{SE}[WHILE]{While}{EndWhile}[1]{\algorithmicwhile\ #1\ \algorithmicdo}{\algorithmicend\ \algorithmicwhile}%
+\algdef{SE}[FOR]{For}{EndFor}[1]{\algorithmicfor\ #1\ \algorithmicdo}{\algorithmicend\ \algorithmicfor}%
+\algdef{S}[FOR]{ForAll}[1]{\algorithmicforall\ #1\ \algorithmicdo}%
+\algdef{SE}[LOOP]{Loop}{EndLoop}{\algorithmicloop}{\algorithmicend\ \algorithmicloop}%
+\algdef{SE}[REPEAT]{Repeat}{Until}{\algorithmicrepeat}[1]{\algorithmicuntil\ #1}%
+\algdef{SE}[IF]{If}{EndIf}[1]{\algorithmicif\ #1\ \algorithmicthen}{\algorithmicend\ \algorithmicif}%
+\algdef{C}[IF]{IF}{ElsIf}[1]{\algorithmicelse\ \algorithmicif\ #1\ \algorithmicthen}%
+\algdef{Ce}[ELSE]{IF}{Else}{EndIf}{\algorithmicelse}%
+\algdef{SE}[PROCEDURE]{Procedure}{EndProcedure}%
+   [2]{\algorithmicprocedure\ \textproc{#1}\ifthenelse{\equal{#2}{}}{}{(#2)}}%
+   {\algorithmicend\ \algorithmicprocedure}%
+\algdef{SE}[FUNCTION]{Function}{EndFunction}%
+   [2]{\algorithmicfunction\ \textproc{#1}\ifthenelse{\equal{#2}{}}{}{(#2)}}%
+   {\algorithmicend\ \algorithmicfunction}%
+%
+\ifthenelse{\equal{\ALG at noend}{t}}%
+   {%
+   \algtext*{EndWhile}%
+   \algtext*{EndFor}%
+   \algtext*{EndLoop}%
+   \algtext*{EndIf}%
+   \algtext*{EndProcedure}%
+   \algtext*{EndFunction}%
+   }{}%
+%
+%      ***      OTHER DECLARATIONS      ***
+%
+\algnewcommand\Require{\item[\algorithmicrequire]}%
+\algnewcommand\Ensure{\item[\algorithmicensure]}%
+\algnewcommand\Return{\algorithmicreturn{} }%
+\algnewcommand\Call[2]{\textproc{#1}\ifthenelse{\equal{#2}{}}{}{(#2)}}%
+%
+%
+%
+\ifthenelse{\boolean{ALG at compatible}}%
+   {%
+   \ifthenelse{\equal{\ALG at noend}{t}}%
+      {\RequirePackage[noend]{algcompatible}}%
+      {\RequirePackage{algcompatible}}%
+   }%
+   {}%
+%
diff --git a/docs/userGuide/README.md b/docs/userGuide/README.md
new file mode 100644
index 0000000..1f47865
--- /dev/null
+++ b/docs/userGuide/README.md
@@ -0,0 +1,679 @@
+Manta User Guide
+================
+
+## Table of Contents
+[] (BEGIN automated TOC section, any edits will be overwritten on next source refresh)
+* [Introduction](#introduction)
+* [Installation](#installation)
+* [Method Overview](#method-overview)
+* [Capabilities](#capabilities)
+  * [Detected variant classes](#detected-variant-classes)
+  * [Known Limitations](#known-limitations)
+* [Input requirements](#input-requirements)
+* [Outputs](#outputs)
+  * [Structural Variant predictions](#structural-variant-predictions)
+  * [Manta VCF reporting format](#manta-vcf-reporting-format)
+    * [VCF Sample Names](#vcf-sample-names)
+    * [Small indels](#small-indels)
+    * [Insertions with incomplete insert sequence assembly](#insertions-with-incomplete-insert-sequence-assembly)
+    * [VCF INFO Fields](#vcf-info-fields)
+    * [VCF FORMAT Fields](#vcf-format-fields)
+    * [VCF FILTER Fields](#vcf-filter-fields)
+    * [What do the values in Manta's VCF ID field mean?](#what-do-the-values-in-mantas-vcf-id-field-mean)
+    * [Converting Manta VCF to BEDPE format](#converting-manta-vcf-to-bedpe-format)
+  * [Statistics](#statistics)
+* [Runtime hardware requirements](#runtime-hardware-requirements)
+* [Run configuration and execution](#run-configuration-and-execution)
+  * [Configuration](#configuration)
+    * [Advanced configuration options](#advanced-configuration-options)
+  * [Execution](#execution)
+    * [Advanced execution options](#advanced-execution-options)
+  * [Extended use cases](#extended-use-cases)
+    * [Exome/Targeted](#exometargeted)
+    * [Unpaired tumor sample](#unpaired-tumor-sample)
+  * [RNA-Seq](#rna-seq)
+  * [High sensitivity calling](#high-sensitivity-calling)
+  * [De novo calling](#de-novo-calling)
+  * [Generating evidence bams](#generating-evidence-bams)
+[] (END automated TOC section, any edits will be overwritten on next source refresh)
+
+## Introduction
+
+Manta calls structural variants (SVs) and indels from mapped
+paired-end sequencing reads. It is optimized for analysis of germline
+variation in small sets of individuals and somatic variation in
+tumor/normal sample pairs. Manta discovers, assembles and scores
+large-scale SVs, medium-sized indels and large insertions within a
+single efficient workflow. The method is designed for rapid analysis
+on standard compute hardware: NA12878 at 50x genomic coverage is
+analyzed in less than 20 minutes on a 20 core server, and most WGS
+tumor/normal analyses can be completed within 2 hours. Manta combines
+paired and split-read evidence during SV discovery and scoring to
+improve accuracy, but does not require split-reads or successful
+breakpoint assemblies to report a variant in cases where there is
+strong evidence otherwise. It provides scoring models for germline
+variants in small sets of diploid samples and somatic variants in
+matched tumor/normal sample pairs. There is experimental support for
+analysis of unmatched tumor samples as well (see details below). Manta
+accepts input read mappings from BAM or CRAM files and reports all SV
+and indel inferences in VCF 4.1 format.
+
+Methods and benchmarking details are described in:
+
+Chen, X. *et al.* (2016) Manta: rapid detection of structural variants and
+indels for germline and cancer sequencing applications. *Bioinformatics*,
+32, 1220-1222. [doi:10.1093/bioinformatics/btv710][bpaper]
+
+...and the corresponding [open-access pre-print][preprint].
+
+[bpaper]:https://dx.doi.org/10.1093/bioinformatics/btv710
+[preprint]:http://dx.doi.org/10.1101/024232
+
+
+## Installation
+
+Please see the [Manta installation instructions](installation.md)
+
+
+## Method Overview
+
+Manta divides the SV and indel discovery process into two primary
+steps: (1) scanning the genome to find SV associated regions and (2)
+analysis, scoring and output of SVs found in such regions.
+
+1. **Build breakend association graph** In this step the entire genome
+is scanned to discover evidence of possible SVs and large indels. This
+evidence is enumerated into a graph with edges connecting all regions
+of the genome which have a possible breakend association. Edges may
+connect two different regions of the genome to represent evidence of a
+long-range association, or an edge may connect a region to itself to
+capture a local indel/small SV association. Note that these
+associations are more general than a specific SV hypothesis, in that
+many breakend candidates may be found on one edge, although typically
+only one or two candidates are found per edge.
+
+2. **Analyze graph edges to find SVs** The second step is to analyze
+individual graph edges or groups of highly connected edges to discover
+and score SVs associated with the edge(s). The substeps of this
+process include inference of SV candidates associated with the edge,
+attempted assembly of the SVs breakends, scoring/genotyping and
+filtration of the SV under various biological models (currently
+diploid germline and somatic), and finally, output to VCF.
+
+
+## Capabilities
+
+Manta is capable of detecting all structural variant types which are
+identifiable in the absence of copy number analysis and large-scale
+de-novo assembly. Detectable types are enumerated further below.
+
+For each structural variant and indel, Manta attempts to assemble the
+breakends to basepair resolution and report the left-shifted breakend
+coordinate (per the [VCF 4.1][1] SV reporting guidelines), together
+with any breakend homology sequence and/or inserted sequence between
+the breakends. It is often the case that the assembly will fail to
+provide a confident explanation of the data -- in such cases the
+variant will be reported as `IMPRECISE`, and scored according to the
+paired-end read evidence only.
+
+The sequencing reads provided as input to Manta are expected to be
+from a paired-end sequencing assay which results in an "innie"
+orientation between the two reads of each sequence fragment, each
+presenting a read from the outer edge of the fragment insert inward.
+
+Manta is primarily tested for whole-genome and whole-exome (or other
+targeted enrichement) sequencing assays on DNA. For these assays the
+following applications are supported:
+
+* Joint analysis of small sets of diploid individuals (where 'small' means
+family-scale -- roughly 10 or fewer samples)
+* Subtractive analysis of a matched tumor/normal sample pair
+* Analysis of an individual tumor sample
+
+For the first use case above, note that there is no specific restriction against
+using Manta for the joint analysis of larger cohorts, but this has not
+been extensively tested so there may be stability or call quality
+issues.
+
+Per the final use case above, tumor samples can be analyzed without a matched normal sample. In
+this case no scoring function is available, but the supporting
+evidence counts and many filters can still be usefully applied.
+
+RNA-Seq analysis is still in development and not fully supported. It
+can be configured with the `--rna` flag. This will adjust filtration
+levels and take other RNA-specific filtration and intron handling steps
+(more details are provided further below).
+
+### Detected variant classes
+
+Manta is able to detect all variation classes which can be explained
+as novel DNA adjacencies in the genome. Simple insertion/deletion
+events can be detected down to a configurable minimum size cutoff
+(defaulting to 8). All DNA adjacencies are classified into the
+following categories based on the breakend pattern:
+
+* Deletions
+* Insertions
+    * Fully-assembled insertions
+    * Partially-assembled (ie. inferred) insertions
+* Inversions
+* Tandem Duplications
+* Interchromosomal Translocations
+
+### Known Limitations
+
+Manta should not be able to detect the following variant types:
+
+* Dispersed duplications
+* Most expansion/contraction variants of a reference tandem repeat
+* Small inversions
+    * The limiting size is not tested, but in theory detection falls off
+  below ~200bases. So-called micro-inversions might be detected indirectly as
+  combined insertion/deletion variants.
+* Fully-assembled large insertions
+    * The maximum fully-assembled insertion size should correspond to
+  approximately twice the read-pair fragment size, but note that power to fully
+  assemble the insertion should fall off to impractical levels before this
+  size
+    * Note that manta does detect and report very large insertions
+  when the breakend signature of such an event is found, even though
+  the inserted sequence cannot be fully assembled.
+
+More general repeat-based limitations exist for all variant types:
+
+* Power to assemble variants to breakend resolution falls to zero as
+  breakend repeat length approaches the read size.
+* Power to detect any breakend falls to (nearly) zero as the breakend
+  repeat length approaches the fragment size.
+
+Note that while Manta classifies novel DNA-adjacencies, it does not
+infer the higher level constructs implied by the classification. For
+instance, a variant marked as a deletion by manta indicates an
+intrachromosomal translocation with a deletion-like breakend pattern,
+however there is no test of depth, b-allele frequency or intersecting
+adjacencies to directly infer the SV type.
+
+## Input requirements
+
+The sequencing reads provided as input to Manta are expected to be
+from a paired-end sequencing assay with an "innie" orientation between
+the two reads of each DNA fragment, each presenting a read from the
+outer edge of the fragment insert inward.
+
+Manta can tolerate non-paired reads in the input, so long as
+sufficient paired-end reads exist to estimate the paired fragment size
+distribution. Non-paired reads will still be used in discovery,
+assembly and split-read scoring if their alignments (or SA tag split
+alignments) support a large indel or SV, or mismatch/clipping suggests
+a possible breakend location.
+
+Manta requires input sequencing reads to be mapped by an external tool
+and provided as input in either BAM or CRAM format. Each input file must be
+coordinate sorted and indexed to produce a`samtools/htslib`-style index in a
+file named to match the input BAM or CRAM file with an additional '.bai', '.crai'
+or '.csi' filename extension.
+
+At configuration time, at least one BAM or CRAM file must be provided for the
+normal or tumor sample. A matched tumor-normal sample pair can be
+provided as well. If multiple input files are provided for the normal
+sample, each file will be treated as a separate sample as part of a
+joint diploid sample analysis.
+
+The following limitations exist on the input BAM or CRAM files provided to
+Manta:
+
+* Alignments cannot contain the "=" character in the SEQ field.
+* Alignments cannot use the sequence match/mismatch ("="/"X") CIGAR notation
+* RG (read group) tags in the alignment records are ignored -- each file will be
+treated as representing one sample.
+* Alignments with basecall quality values greater than 70 are rejected (these
+  are not supported on the assumption that this indicates an offset error)
+
+Manta also requires a reference sequence in fasta format. This must be
+the same reference used for mapping the input alignment files. The reference
+must include a `samtools/htslib`-style index in a file named to match the
+input fasta with an additional '.fai' file extension.
+
+
+## Outputs
+
+### Structural Variant predictions
+
+The primary Manta outputs are a set of [VCF 4.1][1] files, found in
+`${MANTA_ANALYSIS_PATH}/results/variants`. Currently there are 3 VCF
+files created for a germline analysis, and an additional somatic VCF
+is produced for a tumor/normal subtraction. These files are:
+
+* __diploidSV.vcf.gz__
+    * SVs and indels scored and genotyped under a diploid model for the set of
+  samples in a joint diploid sample analysis or for the normal sample in a
+  tumor/normal subtraction analysis. In the case of a tumor/normal subtraction,
+  the scores in this file do not reflect any information from the tumor sample.
+* __somaticSV.vcf.gz__
+    * SVs and indels scored under a somatic variant model. This file
+  will only be produced if a tumor sample alignment file is supplied during
+  configuration
+* __candidateSV.vcf.gz__
+    * Unscored SV and indel candidates. Only a minimal amount of supporting
+  evidence is required for an SV to be entered as a candidate in this file.
+  An SV or indel must be a candidate to be considered for scoring, therefore
+  an SV cannot appear in the other VCF outputs if it is not present in this
+  file. Note that by default this file includes indels down to a very small
+  size (>= 8 bases). These are intended to be passed on to a small variant
+  caller without scoring by manta itself (by default manta scoring starts
+  at size 51).
+* __candidateSmallIndels.vcf.gz__
+    * Subset of the candidateSV.vcf.gz file containing only simple insertion and
+  deletion variants of size 50 or less. Passing this file to a small variant caller
+  like strelka or starling (Isaac Variant Caller) will provide continuous
+  coverage over all indel sizes when the small variant caller and manta outputs are
+  evaluated together. Alternate small indel candidate sets can be parsed out of the
+  candidateSV.vcf.gz file if this candidate set is not appropriate.
+
+For tumor-only analysis, Manta will produce an additional VCF:
+
+* __tumorSV.vcf.gz__
+    * Unscored SV and indel candidates (same content as the __candidateSV.vcf.gz__ above),
+  but including additional details: (1) paired and split read supporting evidence counts
+  for each allele (2) a subset of the filters from the scored tumor-normal model
+  are applied to the single tumor case to improve precision.
+
+### Manta VCF reporting format
+
+Manta VCF output follows the VCF 4.1 spec for describing structural
+variants. It uses standard field names whereever possible. All custom
+fields are described in the VCF header.  The section below highlights
+some of the variant representation details and lists the primary VCF
+field values.
+
+#### VCF Sample Names
+
+Sample names printed into the VCF output are extracted from each input
+alignment file from the first read group ('@RG') record found in the
+header. Any spaces found in the name will be replaced with
+underscores. If no sample name is found a default SAMPLE1, SAMPLE2,
+etc.. label will be used instead.
+
+#### Small indels
+
+All variants are reported in the VCF using symbolic alleles unless
+they are classified as a small indel, in which case full sequences are
+provided for the VCF `REF` and `ALT` allele fields. A variant is
+classified as a small indel if all of these criteria are met:
+
+* The variant can be entirely expressed as a combination of inserted and deleted sequence.
+* The deletion or insertion length is not 1000 or greater.
+* The variant breakends and/or the inserted sequence are not imprecise.
+
+When VCF records are printed in the small indel format, they will also
+include the `CIGAR` INFO tag describing the combined insertion and
+deletion event.
+
+#### Insertions with incomplete insert sequence assembly
+
+Large insertions are reported in some cases even when the insert
+sequence cannot be fully assembled.  In this case Manta reports the
+insertion using the `<INS>` symbolic allele and includes the special
+INFO fields `LEFT_SVINSSEQ` and `RIGHT_SVINSSEQ` to describe the
+assembled left and right ends of the insert sequence. The following is
+an example of such a record from the joint diploid analysis of
+NA12878, NA12891 and NA12892 mapped to hg19:
+
+```
+chr1    11830208        MantaINS:1577:0:0:0:3:0 T       <INS>   999     PASS    END=11830208;SVTYPE=INS;CIPOS=0,12;CIEND=0,12;HOMLEN=12;HOMSEQ=TAAATTTTTCTT;LEFT_SVINSSEQ=TAAATTTTTCTTTTTTCTTTTTTTTTTAAATTTATTTTTTTATTGATAATTCTTGGGTGTTTCTCACAGAGGGGGATTTGGCAGGGTCACGGGACAACAGTGGAGGGAAGGTCAGCAGACAAACAAGTGAACAAAGGTCTCTGGTTTTCCCAGGCAGAGGACCCTGCGGCCTTCCGCAGTGTTCGTGTCCCTGATTACCTGAGATTAGGGATTTGTGATGACTCCCAACGAGCATGCTGCCTTCAAGCATCTGTTCAACAAAGCACATCTTGCACTGCCCTTAATTCATTTAACCCCGAGTGGACACAGCACATGTTTCAAA [...]
+```
+
+#### VCF INFO Fields
+
+ID | Description
+--- | ---
+IMPRECISE | Flag indicating that the structural variation is imprecise, i.e. the exact breakpoint location is not found
+SVTYPE | Type of structural variant
+SVLEN | Difference in length between REF and ALT alleles
+END | End position of the variant described in this record
+CIPOS | Confidence interval around POS
+CIEND | Confidence interval around END
+CIGAR | CIGAR alignment for each alternate indel allele
+MATEID | ID of mate breakend
+EVENT | ID of event associated to breakend
+HOMLEN | Length of base pair identical homology at event breakpoints
+HOMSEQ | Sequence of base pair identical homology at event breakpoints
+SVINSLEN | Length of insertion
+SVINSSEQ | Sequence of insertion
+LEFT_SVINSSEQ | Known left side of insertion for an insertion of unknown length
+RIGHT_SVINSSEQ | Known right side of insertion for an insertion of unknown length
+INV3 | Flag indicating that inversion breakends open 3' of reported location
+INV5 | Flag indicating that inversion breakends open 5' of reported location
+BND_DEPTH | Read depth at local translocation breakend
+MATE_BND_DEPTH | Read depth at remote translocation mate breakend
+JUNCTION_QUAL | If the SV junction is part of an EVENT (ie. a multi-adjacency variant), this field provides the QUAL value for the adjacency in question only
+SOMATIC | Flag indicating a somatic variant
+SOMATICSCORE | Somatic variant quality score
+JUNCTION_SOMATICSCORE | If the SV junction is part of an EVENT (ie. a multi-adjacency variant), this field provides the SOMATICSCORE value for the adjacency in question only
+
+#### VCF FORMAT Fields
+
+ID | Description
+--- | ---
+GT | Genotype
+FT | Sample filter, 'PASS' indicates that all filters have passed for this sample
+GQ | Genotype Quality
+PL | Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification
+PR | Number of spanning read pairs which strongly (Q30) support the REF or ALT alleles
+SR | Number of split-reads which strongly (Q30) support the REF or ALT alleles
+
+#### VCF FILTER Fields
+
+ID | Description
+--- | ---
+MinQUAL | QUAL score is less than 20
+MinGQ | GQ score is less than 15 (filter applied at sample level and record level if all samples are filtered)
+MinSomaticScore | SOMATICSCORE is less than 30
+Ploidy | For DEL & DUP variants, the genotypes of overlapping variants (with similar size) are inconsistent with diploid expectation
+MaxDepth | Depth is greater than 3x the median chromosome depth near one or both variant breakends
+MaxMQ0Frac | For a small variant (<1000 bases), the fraction of reads in all samples with MAPQ0 around either breakend exceeds 0.4
+NoPairSupport | For variants significantly larger than the paired read fragment size, no paired reads support the alternate allele in any sample
+
+#### What do the values in Manta's VCF ID field mean?
+
+The VCF ID or 'identifer' field can be used for annotation, or in the case of BND ('breakend') records for translocations, the ID value is used to link breakend mates or partners.
+
+An example Manta VCF ID is "MantaINS:1577:0:0:0:3:0". The value provided in this field reflects the SV association graph edge(s) from which the SV or indel was discovered. The ID value provided by Manta is primarily intended for internal use by manta developers. The value is guaranteed to be unique within any VCF file produced by Manta, and these ID values are used to link associated breakend records using the standard VCF `MATEID` key. The structure of this ID may change in the future,  [...]
+
+The exact meaning of the ID field for the current Manta version is described in the [following section](../developerGuide/ID.md) of the [Manta developer guide](../developerGuide/README.md).
+
+
+#### Converting Manta VCF to BEDPE format
+
+It can sometimes be convenient to express structural variants in BEDPE
+format. For such applications we recommend the script `vcfToBedpe`
+available from:
+
+https://github.com/ctsa/svtools
+
+This repository is forked from @hall-lab with edits to support VCF 4.1
+SV format and match Manta's portability contstaints.
+
+Note that BEDPE format greatly reduces structural variant information
+compared to Manta's VCF output. In particular breakend orientation,
+breakend homology and insertion sequence are lost, in addition to the
+ability to define fields for locus and sample specific
+information. For this reason we only recommend BEDPE as a temporary
+intermediate output for applications which require it.
+
+
+### Statistics
+
+Additional secondary output is provided in `${MANTA_ANALYSIS_PATH}/results/stats`
+
+* __alignmentStatsSummary.txt__
+    * fragment length quantiles for each input alignment file
+* __svLocusGraphStats.tsv__
+    * statistics and runtime information pertaining to the SV locus graph
+* __svCandidateGenerationStats.tsv__
+    * statistics and runtime information pertaining to the SV candidate generation
+* __svCandidateGenerationStats.xml__
+    * xml data backing the svCandidateGenerationStats.tsv report
+
+
+## Runtime hardware requirements
+
+Manta workflows are parallelized at the process level using the
+[pyFlow] [pyflow_site] task manager. pyFlow can distrubute Manta
+workflows to a specified number of cores on a single host or
+SGE-managed cluster.
+
+As a useful runtime benchmark, [Platinum Genomes] [PG] sequencing
+reads for NA12878 at 50x coverage (whole genome) can be analyzed in
+less than 20 minutes on 20 physical cores using a dual Xeon E5-2680
+v2 server with the BAM accessed from a conventional local
+drive, peak total memory (RSS) for this run was 2.35 Gb.
+Additional hardware notes:
+
+* **Memory** Typical memory requirements are <1Gb/core for germline
+analysis and <2Gb/core for cancer/FFPE/highly rearranged
+samples. The exact requirement depends on many factors including
+sequencing depth, read length, fragment size and sample quality.
+
+* **CPU** Manta does not require or benefit from any specific modern
+CPU feature (e.g. NUMA, AVX..), but in general faster clock and
+larger caches will improve performance.
+
+* **I/O** I/O can be roughly approximated as 1.1 reads of the
+input alignment file per analysis, with no writes that are significant
+relative to the alignment file size.
+
+[pyflow_site]:http://illumina.github.io/pyflow
+[PG]:http://www.platinumgenomes.org
+
+
+## Run configuration and execution
+
+Manta is run in a two step procedure: (1) configuration and (2)
+workflow execution. The configuration step is used to specify the
+input data and any options pertaining to the variant calling methods
+themselves. The execution step is used to specify any parameters
+pertaining to _how_ manta is executed (such as the total number of
+cores or SGE nodes over which the jobs should be parallelized). The
+second execution step can also be interrupted and restarted without
+changing the final result of the workflow.
+
+### Configuration
+
+The workflow is configured with the script:
+`${MANTA_INSTALL_PATH}/bin/configManta.py` . Running this script with
+no arguments will display all standard configuration options to
+folder. Note that all input alignment (BAM or CRAM) files and reference sequence must
+contain the same chromosome names in the same order. In addition all
+input alignment files and reference sequences must be indexed with
+`samtools` (or a utility which creates equivilent index
+files). Manta's default settings assume a whole genome DNA-Seq
+analysis, but there are configuration options for exome/targeted
+sequencing analysis in addition to RNA-Seq.
+
+Single Diploid Sample Analysis -- Example Configuration:
+
+```
+${MANTA_INSTALL_PATH}/bin/configManta.py \
+--bam NA12878_S1.bam \
+--referenceFasta hg19.fa \
+--runDir ${MANTA_ANALYSIS_PATH}
+```
+
+Joint Diploid Sample Analysis -- Example Configuration:
+
+```
+${MANTA_INSTALL_PATH}/bin/configManta.py \
+--bam NA12878_S1.cram \
+--bam NA12891_S1.cram \
+--bam NA12892_S1.cram \
+--referenceFasta hg19.fa \
+--runDir ${MANTA_ANALYSIS_PATH}
+```
+
+Tumor Normal Analysis -- Example Configuration:
+
+```
+${MANTA_INSTALL_PATH}/bin/configManta.py \
+--normalBam HCC1187BL.cram \
+--tumorBam HCC1187C.cram \
+--referenceFasta hg19.fa \
+--runDir ${MANTA_ANALYSIS_PATH}
+```
+
+Tumor-Only Analysis -- Example Configuration:
+
+```
+${MANTA_INSTALL_PATH}/bin/configManta.py \
+--tumorBam HCC1187C.cram \
+--referenceFasta hg19.fa \
+--runDir ${MANTA_ANALYSIS_PATH}
+```
+
+On completion, the configuration script will create the workflow run
+script `${MANTA_ANALYSIS_PATH}/runWorkflow.py`. This can be used to
+run the workflow in various parallel compute modes per the
+instructions in the [Execution] section below.
+
+#### Advanced configuration options
+
+There are two sources of advanced configuration options:
+
+* Options listed in the file: `${MANTA_INSTALL_PATH}/bin/configManta.py.ini`
+    * These parameters are not expected to change frequently. Changing the file
+  listed above will re-configure all manta runs for the installation. To change
+  parameters for a single run, copy the configManta.py.ini file to another location,
+  change the desired parameter values and supply the new file using the configuration
+  script's `--config FILE` option.
+* Advanced options listed in: `${MANTA_INSTALL_PATH}/bin/configManta.py --allHelp`
+    * These options are intended primarily for workflow development and
+  debugging, but could be useful for runtime optimization in some specialized
+  cases.
+
+### Execution
+
+The configuration step creates a new workflow run script in the
+requested run directory:
+
+`{MANTA_ANALYSIS_PATH}/runWorkflow.py`
+
+This script is used to control parallel execution of Manta via the
+[pyFlow][2] task engine. It can be used to parallelize structural
+variant analysis via one of two modes:
+
+1. Parallelized across multiple cores on a single node.
+2. Parallelized across multiple nodes on an SGE cluster.
+
+A running workflow can be interrupted at any time and resumed where it
+left off. If desired, the resumed analysis can use a different running
+mode or total core count.
+
+For a full list of execution options, see:
+
+`{MANTA_ANALYSIS_PATH}/runWorkflow.py -h`
+
+Example execution on a single node:
+
+```
+${MANTA_ANALYSIS_PATH}/runWorkflow.py -m local -j 8
+```
+
+Example execution on an SGE cluster:
+
+```
+${MANTA_ANALYSIS_PATH}/runWorkflow.py -m sge -j 36
+```
+
+#### Advanced execution options
+
+These options are useful for Manta development and debugging:
+
+* Stderr logging can be disabled with `--quiet` argument. Note this
+  log is replicated to
+  `${MANTA_ANALYSIS_PATH}/workspace/pyflow.data/logs/pyflow_log.txt`
+  so there is no loss of log information.
+* The `--rescore` option can be provided to force the workflow to
+  re-execute candidates discovery and scoring, but not the initial
+  graph generation steps.
+* The `--generateEvidenceBam` option can be used to generate bam files
+  of evidence reads for SVs listed in the candidate vcf file.
+  (More details in the section "Generating evidence bams" below)
+
+### Extended use cases
+
+#### Exome/Targeted
+
+Supplying the '--exome' flag at configuration time will provide
+appropriate settings for WES and other regional enrichment
+analyses. At present this flag disables all high depth filters, which
+are designed to exclude pericentromeric reference compressions in the
+WGS case but cannot be applied correctly to a targeted analysis.
+
+For small targeted regions, it may also be helpful to consider the
+high sensitivity calling documentation below.
+
+#### Unpaired tumor sample
+
+Manta supports SV calling for tumor sample only. The tumor-only mode
+can be triggered by supplying a tumor sample alignment file but no alignments for the normal sample.
+The results are reported in __tumorSV.vcf.gz__. This file contains all
+SV candidates (similar to the __candidateSV.vcf.gz__ file), but also
+includes paired and split read evidence for each allele and a
+subset of the filters used for the tumor-normal comparative analysis.
+Note that Manta does not yet provide a quality scoring model for unpaired
+tumor sample analysis.
+
+For low allele frequency variants, it may also be helpful to consider the
+high sensitivity calling documentation below.
+
+### RNA-Seq
+
+Supplying the '--rna' flag at configuration time will provide
+experimental settings for RNA-Seq Fusion calling. At present this flag
+disables all high depth filters which are designed to exclude
+pericentromeric reference compressions in the WGS case but cannot be
+applied correctly to RNA-Seq analysis.  In addition many custom RNA
+read processing and alignment steps are invoked. This mode is designed
+to function as part of larger workflow with additional steps to reduce
+overall false positive rate which take place downstream from Manta's
+fusion calling step.
+
+It may also be helpful to consider the high sensitivity calling
+documentation below for this mode.
+
+### High sensitivity calling
+
+Manta is configured with a discovery sensitivity appropriate for
+general WGS applications.  In targeted or other specialized contexts
+the candidate sensitivity can be increased. A recommended general high
+sensitivity mode can be obtained by changing the two values
+'minEdgeObservations' and 'minCandidateSpanningCount' in the manta
+configuration file (see 'Advanced configuration options' above) to 2
+observations per candidate (the default is 3):
+
+```
+# Remove all edges from the graph unless they're supported by this many 'observations'.
+# Note that one supporting read pair or split read usually equals one observation, but
+# evidence is sometimes downweighted.
+minEdgeObservations = 2
+
+# Run discovery and candidate reporting for all SVs/indels with at least this
+# many spanning support observations
+minCandidateSpanningCount = 2
+```
+
+### De novo calling
+
+Manta can be used for de novo calling, following a two-step procedure:
+
+1) Manta can take multiple input bams for the normal
+sample, each bam file being treated as a separate sample as part of a
+joint diploid sample analysis, and then output a multi-sample vcf, where the sample order follows that of the input bams.
+
+2) A post-processing script, provided as $MANTA_INSTALL_FOLDER/libexec/denovo_scoring.py, can be applied to the multi-sample vcf and detects SVs that have inheritance conflicts among a trio sample set.
+
+   The script usage is
+   denovo_scoring.py <vcf file> <proband sample ID> <father sample ID> <mother sample ID>
+   It will ignore any samples in the vcf that are not specified at the commandline.
+
+   Under the same folder of the input vcf, the script outputs a new vcf file and a text file of stats for the de novo calls. Currently, all SVs with inheritance conflicts are labled with "DQ=60" inside the INFO, while all SVs without any conflict are labled with "DQ=0".
+
+
+### Generating evidence bams
+
+Using the `--generateEvidenceBam` option, Manta can be configured to generate bam files of evidence reads for SVs listed in the candidate vcf file.
+
+It is recommended to use this option together with the `--region` option, so that the analysis is limited to relatively small genomic regions for debugging purposes.
+
+The evidence bam files are provided in `${MANTA_ANALYSIS_PATH}/results/evidence`, with a naming format `evidence.*.bam`.
+There is one such file for each input bam of the analysis, containing evidence reads of the candidate SVs identified from that input bam.
+Each read in an evidence bam keeps all information from the original bam, and it contains also a customized tag in the format: `ZM:Z:${MANTA_SV_ID_1}|${EVIDENCE_TYPE},${MANTA_SV_ID_2}|${EVIDENCE_TYPE}`. For example, ZM:Z:MantaINV:5:0:1:0:0:0|PR|SRM,MantaDEL:5:1:2:0:0:0|SR
+* One read can have more than one of the three evidence types: PR for paired reads, SR for split reads, and SRM for split read mates.
+* One read can be evidence for multiple SVs, which are separated by commas in the tag.
+
+Notice that the number of evidence reads for a particular SV in the evidence bam files could be more than the evidence counts (PR and SR) in the final vcf files. This is because more stringent criteria are applied for generating evidence counts in the final vcf files.
+
+
+[1]: http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-41
+[2]: http://sequencing.github.io/pyflow/
+
diff --git a/docs/userGuide/installation.md b/docs/userGuide/installation.md
new file mode 100644
index 0000000..6df670a
--- /dev/null
+++ b/docs/userGuide/installation.md
@@ -0,0 +1,166 @@
+Manta User Guide - Installation
+===============================
+
+[User Guide Home](README.md)
+
+## Table of Contents
+[] (BEGIN automated TOC section, any edits will be overwritten on next source refresh)
+* [Prerequisites to build from source](#prerequisites-to-build-from-source)
+* [Runtime prerequisites](#runtime-prerequisites)
+* [Operating System Guidelines](#operating-system-guidelines)
+    * [Linux](#linux)
+    * [OS X](#os-x)
+    * [Windows](#windows)
+* [Linux Package Additions](#linux-package-additions)
+    * [Ubuntu 14.04 and 16.04](#ubuntu-1404-and-1604)
+    * [Ubuntu 12.04](#ubuntu-1204)
+    * [CentOS 7](#centos-7)
+    * [CentOS 5 and 6](#centos-5-and-6)
+* [Build procedure](#build-procedure)
+    * [Workflow relocation](#workflow-relocation)
+* [Demo](#demo)
+[] (END automated TOC section, any edits will be overwritten on next source refresh)
+
+It is recommended to start from one of the [binary distributions on
+the Manta releases page] [releases] if a suitable version is available
+(note that the CentOS 5 binary distribution is expected to support a
+large variety of linux systems).  If building from source start from
+the release distributions of the source code, also provided on the
+[Manta releases page] [releases]. Cloning/archiving the source
+directly from git could result in missing version number entries,
+undesirably stringent build requirements or an unstable development
+version between releases. Additional build notes for Manta developers can
+be found in the [manta developer guide] [developerGuide].
+
+[releases]:https://github.com/Illumina/manta/releases
+[DeveloperGuide]:../developerGuide/README.md
+
+
+### Prerequisites to build from source
+
+[![Build Status] [tcistatus]] [tcihome]
+
+[tcistatus]:https://travis-ci.org/Illumina/manta.svg?branch=master
+[tcihome]:https://travis-ci.org/Illumina/manta
+
+Manta requires a compiler supporting most of the C++11 standard. These
+are the current minimum versions enforced by the build system:
+
+* python 2.4+
+* gcc 4.8+ OR clang 3.2+ (OR Visual Studio 2013+, see windows note below)
+
+### Runtime prerequisites
+
+* python 2.4+
+
+### Operating System Guidelines
+
+##### Linux
+
+Manta is known to build and run on the following linux distributions
+(with additional packages as described below):
+
+- Ubuntu 12.04, 14.04, 16.04
+- CentOS 5, 6, 7
+
+##### OS X
+
+Manta builds and passes basic tests on OS X 10.9, but full WGS analyses
+are not tested for this platform.
+
+##### Windows
+
+Manta does not build or run on windows. Library-level compilation is
+possible for Visual Studio users. See the the [manta developer guide] [DeveloperGuide] for details.
+
+### Linux Package Additions
+
+##### Ubuntu 14.04 and 16.04
+
+    apt-get update -qq
+    apt-get install -qq bzip2 gcc g++ make python zlib1g-dev
+
+##### Ubuntu 12.04
+
+    apt-get update -qq
+    apt-get install -qq bzip2 gcc g++ make python python-software-properties zlib1g-dev
+    # add gcc 4.8 from ubuntu ppa:
+    add-apt-repository -y ppa:ubuntu-toolchain-r/test
+    apt-get update -qq
+    apt-get install -qq gcc-4.8 g++-4.8
+
+    # Prior to build configuration, set CC/CXX to gcc 4.8:
+    export CC=/usr/bin/gcc-4.8
+    export CXX=/usr/bin/g++-4.8
+
+##### CentOS 7
+
+    yum install -y tar bzip2 make gcc gcc-c++ libstdc++-static zlib-devel
+
+##### CentOS 5 and 6
+
+    yum install -y tar wget bzip2 make gcc gcc-c++ zlib-devel
+    # add gcc 4.8 from developer tools v2:
+    wget http://people.centos.org/tru/devtools-2/devtools-2.repo -O /etc/yum.repos.d/devtools-2.repo
+    yum install -y devtoolset-2-gcc devtoolset-2-gcc-c++ devtoolset-2-binutils
+
+    # Prior to build configuration, set CC/CXX to gcc 4.8:
+    export CC=/opt/rh/devtoolset-2/root/usr/bin/gcc
+    export CXX=/opt/rh/devtoolset-2/root/usr/bin/g++
+
+### Build procedure
+
+After acquiring a release distribution of the source code, the build
+procedure is:
+
+* Unpack source code
+* Create and move to a separate `build` directory (out-of-source build is required.)
+* Configure build
+* Compile & Install
+
+Example (building on 4 cores):
+
+    wget https://github.com/Illumina/manta/releases/download/v${MANTA_VERSION}/manta-${MANTA_VERSION}.release_src.tar.bz2
+    tar -xjf manta-${MANTA_VERSION}.release_src.tar.bz2
+    mkdir build && cd build
+    # Ensure that CC and CXX are updated to target compiler if needed, e.g.:
+    #     export CC=/path/to/cc
+    #     export CXX=/path/to/c++
+    ../manta-${MANTA_VERSION}.release_src/configure --jobs=4 --prefix=/path/to/install
+    make -j4 install
+
+Note that during the configuration step, the following dependencies
+will be built from source if they are not found:
+
+* cmake 2.8.0+
+* boost 1.56.0+
+
+To accelerate this process the configuration step can be parallelized
+over multiple cores, as demonstrated in the example above with the
+`--jobs=4` argument to configure.
+
+To see more configure options, run:
+
+    ${MANTA_SRC_PATH}/configure --help
+
+##### Workflow relocation
+
+After Manta is built the installation directory can be relocated to
+another directory.  All internal paths used in the workflow are
+relative.
+
+### Demo
+
+To help verify a successful installation, Manta includes a small demo
+data set and test script. After completing the installation steps
+above, the demo can be run as follows:
+
+    python ${MANTA_INSTALL_PATH}/bin/runMantaWorkflowDemo.py
+
+This script creates a `MantaDemoAnalysis` directory under the current
+working directory, runs Manta on a small demo dataset, and compares the
+somatic structural variant output to an expected result.
+
+See [the demo README](../../src/demo/README.md) for additional information
+on the test script and data.
+
diff --git a/redist/CMakeLists.txt b/redist/CMakeLists.txt
new file mode 100644
index 0000000..c29d778
--- /dev/null
+++ b/redist/CMakeLists.txt
@@ -0,0 +1,216 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## Configuration file for external tools
+##
+## author Come Raczy
+##
+################################################################################
+
+message(STATUS "Building external tools")
+
+include ("${THIS_GLOBALS_CMAKE}")
+include ("${THIS_MACROS_CMAKE}")
+
+# convenience macro to set in both current and parent scope:
+macro(superset symbol value)
+    set(${symbol} "${value}")
+    set(${symbol} "${value}" PARENT_SCOPE)
+endmacro()
+
+
+set (THIS_CMAKE_PLATFORM "")
+if (WIN32)
+    # Sometimes the platform (x64,win32...) is specified as part of the target name
+    # this captures the cases where it is specified separately:
+    if (CMAKE_GENERATOR_PLATFORM)
+        set (THIS_CMAKE_PLATFORM -A "${CMAKE_GENERATOR_PLATFORM}")
+    endif ()
+endif ()
+
+
+#
+# zlib
+#
+set(ZLIB_PREFIX "zlib-1.2.8")
+superset(ZLIB_DIR "${CMAKE_CURRENT_BINARY_DIR}/${ZLIB_PREFIX}")
+if (WIN32)
+    superset(ZLIB_LIBRARY "${ZLIB_DIR}/${CMAKE_BUILD_TYPE}/zlibstatic.lib")
+else ()
+    superset(ZLIB_LIBRARY "${ZLIB_DIR}/libz.a")
+endif ()
+
+add_custom_command(
+	OUTPUT ${ZLIB_DIR}
+	COMMAND ${CMAKE_COMMAND} -E remove_directory "${ZLIB_DIR}"
+	COMMAND ${CMAKE_COMMAND} -E tar xjf "${THIS_REDIST_DIR}/${ZLIB_PREFIX}.tar.bz2"
+	COMMENT "Unpacking zlib package")
+
+if (NOT WIN32)
+    set (ASM_MAKE_OPTION LOC=-DASMV OBJA=match.o)
+    if (THIS_ARCH MATCHES "^.*86$")
+        set(ZLIB_ARCH "")
+        set(ASM_FILE_COPY cp contrib/asm686/match.S match.S)
+    elseif (THIS_ARCH MATCHES "^x86_64$")
+        set(ZLIB_ARCH "--64")
+        set(ASM_FILE_COPY cp contrib/amd64/amd64-match.S match.S)
+    else ()
+        set (ZLIB_ARCH "")
+        set (ASM_FILE_COPY "")
+        set (ASM_MAKE_OPTION "")
+    endif ()
+
+    add_custom_command(
+        OUTPUT ${ZLIB_LIBRARY}
+        COMMAND CC=${CMAKE_C_COMPILER} ${ZLIB_DIR}/configure --prefix="${ZLIB_DIR}" --static ${ZLIB_ARCH} >zlib.config.log
+        COMMAND ${ASM_FILE_COPY}
+        COMMAND $(MAKE) ${ASM_MAKE_OPTION} >zlib.build.log 2>zlib.build.error.log
+        WORKING_DIRECTORY ${ZLIB_DIR}
+        DEPENDS ${ZLIB_DIR}
+        COMMENT "Building zlib package")
+else ()
+    add_custom_command(
+        OUTPUT ${ZLIB_LIBRARY}
+        COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" ${THIS_CMAKE_PLATFORM} -DCMAKE_C_COMPILER="${CMAKE_C_COMPILER}" ${ZLIB_DIR} >zlib.config.log
+        COMMAND ${CMAKE_MAKE_PROGRAM}  /p:Configuration=${CMAKE_BUILD_TYPE} "${ZLIB_DIR}/zlib.sln" >zlib.build.log
+        WORKING_DIRECTORY ${ZLIB_DIR}
+        DEPENDS ${ZLIB_DIR}
+        COMMENT "Building zlib package")
+endif ()
+
+
+set(THIS_ZLIB "${THIS_PROJECT_NAME}_zlib")
+add_custom_target(${THIS_ZLIB} DEPENDS "${ZLIB_LIBRARY}")
+
+
+#
+# htslib
+#
+set(HTSLIB_PREFIX "htslib-1.2.1-204-g8197cfd")
+superset(HTSLIB_DIR "${CMAKE_CURRENT_BINARY_DIR}/${HTSLIB_PREFIX}")
+superset(HTSLIB_LIBRARY "${HTSLIB_DIR}/libhts.a")
+
+add_custom_command(
+    OUTPUT ${HTSLIB_DIR}
+    COMMAND ${CMAKE_COMMAND} -E remove_directory "${HTSLIB_DIR}"
+    COMMAND ${CMAKE_COMMAND} -E tar xjf "${THIS_REDIST_DIR}/${HTSLIB_PREFIX}.tar.bz2"
+    DEPENDS ${THIS_ZLIB}
+    COMMENT "Unpacking htslib library")
+
+set (HTSLIB_FINAL_TASK ${HTSLIB_DIR})
+
+if (NOT WIN32)
+    # note that htslib ./configure CFLAGS="custom" will overwrite the standard "-g -O2 -Wall" CFLAGS, so
+    # we need to restore at least the -O2 for reasonable library performance:
+    add_custom_command(
+        OUTPUT ${HTSLIB_LIBRARY}
+        COMMAND ./configure CC="${CMAKE_C_COMPILER}" CFLAGS='-O2 -I"${ZLIB_DIR}"' LIBS="${ZLIB_LIBRARY}" >htslib.config.log
+        COMMAND $(MAKE) lib-static bgzip htsfile tabix >htslib.build.log 2>htslib.build.error.log
+        WORKING_DIRECTORY ${HTSLIB_DIR}
+        DEPENDS ${HTSLIB_DIR}
+        COMMENT "Building htslib library")
+
+    set (HTSLIB_FINAL_TASK ${HTSLIB_LIBRARY})
+endif ()
+
+set(THIS_HTSLIB "${THIS_PROJECT_NAME}_htslib")
+add_custom_target(${THIS_HTSLIB} DEPENDS "${HTSLIB_FINAL_TASK}")
+
+
+if (NOT WIN32)
+    install(PROGRAMS "${HTSLIB_DIR}/bgzip" DESTINATION "${THIS_LIBEXECDIR}")
+    install(PROGRAMS "${HTSLIB_DIR}/htsfile" DESTINATION "${THIS_LIBEXECDIR}")
+    install(PROGRAMS "${HTSLIB_DIR}/tabix" DESTINATION "${THIS_LIBEXECDIR}")
+endif ()
+
+#
+# samtools
+#
+set(SAMTOOLS_PREFIX "samtools-1.2")
+set(SAMTOOLS_DIR "${CMAKE_CURRENT_BINARY_DIR}/${SAMTOOLS_PREFIX}")
+set(SAMTOOLS_LIBRARY "${SAMTOOLS_DIR}/libbam.a")
+superset(SAMTOOLS_PROG "${SAMTOOLS_DIR}/samtools")
+
+# final directory copy below would ideally be a soft-link, copy is for windows build
+add_custom_command(
+    OUTPUT ${SAMTOOLS_DIR}
+    COMMAND ${CMAKE_COMMAND} -E remove_directory "${SAMTOOLS_DIR}"
+    COMMAND ${CMAKE_COMMAND} -E tar xjf "${THIS_REDIST_DIR}/${SAMTOOLS_PREFIX}.tar.bz2"
+    COMMAND ${CMAKE_COMMAND} -E copy_directory "${HTSLIB_DIR}" "${SAMTOOLS_DIR}/${HTSLIB_PREFIX}"
+    DEPENDS ${HTSLIB_FINAL_TASK}
+    COMMENT "Unpacking samtools package")
+
+set (SAMTOOLS_FINAL_TASK ${SAMTOOLS_DIR})
+
+if (NOT WIN32)
+    add_custom_command(
+        OUTPUT ${SAMTOOLS_PROG}
+        COMMAND $(MAKE) HTSDIR=${HTSLIB_PREFIX} all 2>| samtools.build.log
+        DEPENDS ${HTSLIB_LIBRARY}
+        DEPENDS ${SAMTOOLS_DIR}
+        WORKING_DIRECTORY ${SAMTOOLS_DIR}
+        COMMENT "Building samtools package")
+
+    set (SAMTOOLS_FINAL_TASK ${SAMTOOLS_PROG})
+endif ()
+
+set(THIS_SAMTOOLS "${THIS_PROJECT_NAME}_samtools")
+add_custom_target(${THIS_SAMTOOLS} DEPENDS "${SAMTOOLS_FINAL_TASK}")
+
+if (NOT WIN32)
+    install(PROGRAMS "${SAMTOOLS_PROG}" DESTINATION "${THIS_LIBEXECDIR}")
+endif ()
+
+#
+# pyflow
+#
+
+set(PYFLOW_PREFIX "pyflow-1.1.12")
+set(PYFLOW_DIR "${CMAKE_CURRENT_BINARY_DIR}/${PYFLOW_PREFIX}")
+set(PYFLOW_SCRIPT "${PYFLOW_DIR}/src/pyflow.py")
+
+set (PYFLOW_DEPENDS "${PYFLOW_SCRIPT}")
+add_custom_command(
+    OUTPUT ${PYFLOW_SCRIPT}
+    COMMAND ${CMAKE_COMMAND} -E remove_directory "${PYFLOW_DIR}"
+    COMMAND ${CMAKE_COMMAND} -E tar xjf "${THIS_REDIST_DIR}/${PYFLOW_PREFIX}.tar.bz2"
+    COMMAND ${CMAKE_COMMAND} -E remove -f "${PYFLOW_DIR}/src/__init__.py"
+    COMMENT "Unpacking pyflow")
+
+if (PYTHONINTERP_FOUND)
+    set (PYFLOW_DEPENDS "${PYFLOW_SCRIPT}c")
+    add_custom_command(
+        OUTPUT ${PYFLOW_SCRIPT}c
+        COMMAND ${PYTHON_EXECUTABLE} -m compileall -q "${PYFLOW_DIR}/src"
+        DEPENDS ${PYFLOW_SCRIPT}
+        COMMENT "Building pyflow")
+endif()
+
+install_python_lib_dir("${PYFLOW_DIR}/src" "${THIS_PYTHON_LIBDIR}/pyflow")
+
+set (THIS_PYFLOW "${THIS_PROJECT_NAME}_pyflow")
+add_custom_target(${THIS_PYFLOW} DEPENDS "${PYFLOW_DEPENDS}")
+
+
+# tie results back to parent:
+#
+add_dependencies(${THIS_OPT} ${THIS_ZLIB} ${THIS_SAMTOOLS} ${THIS_PYFLOW})
+
diff --git a/redist/README.txt b/redist/README.txt
new file mode 100644
index 0000000..c98dab6
--- /dev/null
+++ b/redist/README.txt
@@ -0,0 +1,11 @@
+3rd party package modification notes:
+
+boost has been modified to remove some files according to
+$ROOT/scratch/make_boost_subset.bash
+
+samtools and htslib have been modified to remove the test/
+directories, in addition to all test and curses requirements
+from the Makefiles.
+
+cmake-modules-c99fd3 modified to show git describe --dirty 
+
diff --git a/redist/cmake-modules-c99fd3/GetGitRevisionDescription.cmake b/redist/cmake-modules-c99fd3/GetGitRevisionDescription.cmake
new file mode 100644
index 0000000..2371709
--- /dev/null
+++ b/redist/cmake-modules-c99fd3/GetGitRevisionDescription.cmake
@@ -0,0 +1,128 @@
+# - Returns a version string from Git
+#
+# These functions force a re-configure on each git commit so that you can
+# trust the values of the variables in your build system.
+#
+#  get_git_head_revision(<refspecvar> <hashvar> [<additional arguments to git describe> ...])
+#
+# Returns the refspec and sha hash of the current head revision
+#
+#  git_describe(<var> [<additional arguments to git describe> ...])
+#
+# Returns the results of git describe on the source tree, and adjusting
+# the output so that it tests false if an error occurs.
+#
+#  git_get_exact_tag(<var> [<additional arguments to git describe> ...])
+#
+# Returns the results of git describe --exact-match on the source tree,
+# and adjusting the output so that it tests false if there was no exact
+# matching tag.
+#
+# Requires CMake 2.6 or newer (uses the 'function' command)
+#
+# Original Author:
+# 2009-2010 Ryan Pavlik <rpavlik at iastate.edu> <abiryan at ryand.net>
+# http://academic.cleardefinition.com
+# Iowa State University HCI Graduate Program/VRAC
+#
+# Copyright Iowa State University 2009-2010.
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+if(__get_git_revision_description)
+	return()
+endif()
+set(__get_git_revision_description YES)
+
+# We must run the following at "include" time, not at function call time,
+# to find the path to this module rather than the path to a calling list file
+get_filename_component(_gitdescmoddir ${CMAKE_CURRENT_LIST_FILE} PATH)
+
+# csaunders: modify to take srcdir as an argument instead of using CMAKE(_CURRENT|)_SOURCE_DIR
+function(get_git_head_revision _refspecvar _hashvar _srcdir)
+	set(GIT_PARENT_DIR "${_srcdir}")
+	set(GIT_DIR "${GIT_PARENT_DIR}/.git")
+	while(NOT EXISTS "${GIT_DIR}")	# .git dir not found, search parent directories
+		set(GIT_PREVIOUS_PARENT "${GIT_PARENT_DIR}")
+		get_filename_component(GIT_PARENT_DIR ${GIT_PARENT_DIR} PATH)
+		if(GIT_PARENT_DIR STREQUAL GIT_PREVIOUS_PARENT)
+			# We have reached the root directory, we are not in git
+			set(${_refspecvar} "GITDIR-NOTFOUND" PARENT_SCOPE)
+			set(${_hashvar} "GITDIR-NOTFOUND" PARENT_SCOPE)
+			return()
+		endif()
+		set(GIT_DIR "${GIT_PARENT_DIR}/.git")
+	endwhile()
+	# check if this is a submodule
+	if(NOT IS_DIRECTORY ${GIT_DIR})
+		file(READ ${GIT_DIR} submodule)
+		string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" GIT_DIR_RELATIVE ${submodule})
+		get_filename_component(SUBMODULE_DIR ${GIT_DIR} PATH)
+		get_filename_component(GIT_DIR ${SUBMODULE_DIR}/${GIT_DIR_RELATIVE} ABSOLUTE)
+	endif()
+	set(GIT_DATA "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/git-data")
+	if(NOT EXISTS "${GIT_DATA}")
+		file(MAKE_DIRECTORY "${GIT_DATA}")
+	endif()
+
+	if(NOT EXISTS "${GIT_DIR}/HEAD")
+		return()
+	endif()
+	set(HEAD_FILE "${GIT_DATA}/HEAD")
+	configure_file("${GIT_DIR}/HEAD" "${HEAD_FILE}" COPYONLY)
+
+	configure_file("${_gitdescmoddir}/GetGitRevisionDescription.cmake.in"
+		"${GIT_DATA}/grabRef.cmake"
+		@ONLY)
+	include("${GIT_DATA}/grabRef.cmake")
+
+	set(${_refspecvar} "${HEAD_REF}" PARENT_SCOPE)
+	set(${_hashvar} "${HEAD_HASH}" PARENT_SCOPE)
+endfunction()
+
+# csaunders: modify to take srcdir as an argument instead of using CMAKE(_CURRENT|)_SOURCE_DIR
+function(git_describe _var _srcdir)
+	if(NOT GIT_FOUND)
+		find_package(Git QUIET)
+	endif()
+	get_git_head_revision(refspec hash ${_srcdir})
+	if(NOT GIT_FOUND)
+		set(${_var} "GIT-NOTFOUND" PARENT_SCOPE)
+		return()
+	endif()
+	if(NOT hash)
+		set(${_var} "HEAD-HASH-NOTFOUND" PARENT_SCOPE)
+		return()
+	endif()
+
+	# TODO sanitize
+	#if((${ARGN}" MATCHES "&&") OR
+	#	(ARGN MATCHES "||") OR
+	#	(ARGN MATCHES "\\;"))
+	#	message("Please report the following error to the project!")
+	#	message(FATAL_ERROR "Looks like someone's doing something nefarious with git_describe! Passed arguments ${ARGN}")
+	#endif()
+
+	#message(STATUS "Arguments to execute_process: ${ARGN}")
+
+    # csaunders: Modified to leave off explicit ${head} specification so that the --dirty flag
+    # can be added in. This probably breaks various submodule/etc, usages, but I can't document which
+	execute_process(COMMAND
+		"${GIT_EXECUTABLE}" describe ${ARGN}
+		WORKING_DIRECTORY ${_srcdir}
+		RESULT_VARIABLE res
+		OUTPUT_VARIABLE out
+		ERROR_QUIET
+		OUTPUT_STRIP_TRAILING_WHITESPACE)
+	if(NOT res EQUAL 0)
+		set(out "${out}-${res}-NOTFOUND")
+	endif()
+
+	set(${_var} "${out}" PARENT_SCOPE)
+endfunction()
+
+function(git_get_exact_tag _var)
+	git_describe(out --exact-match ${ARGN})
+	set(${_var} "${out}" PARENT_SCOPE)
+endfunction()
diff --git a/redist/cmake-modules-c99fd3/GetGitRevisionDescription.cmake.in b/redist/cmake-modules-c99fd3/GetGitRevisionDescription.cmake.in
new file mode 100644
index 0000000..888ce13
--- /dev/null
+++ b/redist/cmake-modules-c99fd3/GetGitRevisionDescription.cmake.in
@@ -0,0 +1,38 @@
+# 
+# Internal file for GetGitRevisionDescription.cmake
+#
+# Requires CMake 2.6 or newer (uses the 'function' command)
+#
+# Original Author:
+# 2009-2010 Ryan Pavlik <rpavlik at iastate.edu> <abiryan at ryand.net>
+# http://academic.cleardefinition.com
+# Iowa State University HCI Graduate Program/VRAC
+#
+# Copyright Iowa State University 2009-2010.
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+set(HEAD_HASH)
+
+file(READ "@HEAD_FILE@" HEAD_CONTENTS LIMIT 1024)
+
+string(STRIP "${HEAD_CONTENTS}" HEAD_CONTENTS)
+if(HEAD_CONTENTS MATCHES "ref")
+	# named branch
+	string(REPLACE "ref: " "" HEAD_REF "${HEAD_CONTENTS}")
+	if(EXISTS "@GIT_DIR@/${HEAD_REF}")
+		configure_file("@GIT_DIR@/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY)
+	elseif(EXISTS "@GIT_DIR@/logs/${HEAD_REF}")
+		configure_file("@GIT_DIR@/logs/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY)
+		set(HEAD_HASH "${HEAD_REF}")
+	endif()
+else()
+	# detached HEAD
+	configure_file("@GIT_DIR@/HEAD" "@GIT_DATA@/head-ref" COPYONLY)
+endif()
+
+if(NOT HEAD_HASH)
+	file(READ "@GIT_DATA@/head-ref" HEAD_HASH LIMIT 1024)
+	string(STRIP "${HEAD_HASH}" HEAD_HASH)
+endif()
diff --git a/redist/cmake-modules-c99fd3/LICENSE_1_0.txt b/redist/cmake-modules-c99fd3/LICENSE_1_0.txt
new file mode 100644
index 0000000..36b7cd9
--- /dev/null
+++ b/redist/cmake-modules-c99fd3/LICENSE_1_0.txt
@@ -0,0 +1,23 @@
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/redist/cmake-modules-c99fd3/README.markdown b/redist/cmake-modules-c99fd3/README.markdown
new file mode 100644
index 0000000..81ec9b2
--- /dev/null
+++ b/redist/cmake-modules-c99fd3/README.markdown
@@ -0,0 +1,107 @@
+Ryan's CMake Modules
+====================
+
+Ryan A. Pavlik, Ph.D.
+
+<ryan.pavlik at gmail.com> <abiryan at ryand.net>
+<http://academic.cleardefinition.com>
+
+Introduction
+------------
+
+This is a collection of CMake modules that I've produced during the course
+of a variety of software development.  There are a number of find modules,
+especially for virtual reality and physical simulation packages, some utility
+modules of more general interest, and some patches or workarounds for
+CMake itself.
+
+Each module is generally documented, and depending on how busy I was
+when I created it, the documentation can be fairly complete.
+
+By now, it also includes contributions both from open-source projects I work on,
+as well as friendly strangers on the Internet contributing their modules. I am
+very grateful for improvements/fixes/pull requests!
+
+How to Integrate
+----------------
+
+These modules are probably best placed wholesale into a `cmake` subdirectory
+of your project source.
+
+If you use Git, try installing [git-subtree][1] (included by default on
+Git for Windows and perhaps for your Linux distro, especially post-1.9.1), so
+you can easily use this repository for subtree merges, updating simply.
+
+For the initial checkout:
+
+	cd projectdir
+
+	git subtree add --squash --prefix=cmake https://github.com/rpavlik/cmake-modules.git master
+
+For updates:
+
+	cd projectdir
+
+	git subtree pull --squash --prefix=cmake https://github.com/rpavlik/cmake-modules.git master
+
+If you originally installed this by just copying the files, you'll sadly have
+to delete the directory, commit that, then do the `git subtree add`. Annoying,
+but I don't know a workaround.
+
+If you use some other version control, you can export a copy of this directory
+without the git metadata by calling:
+
+    ./export-to-directory.sh yourprojectdir/cmake
+
+You might also consider exporting to a temp directory and merging changes, since
+this will not overwrite by default.  You can pass -f to overwrite existing files.
+
+How to Use
+----------
+
+At the minimum, all you have to do is add a line like this near the top
+of your root CMakeLists.txt file (but not before your `project()` call):
+
+	list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
+
+You might also want the extra automatic features/fixes included with the
+modules, for that, just add another line following the first one:
+
+	include(UseBackportedModules)
+
+Look at `module-help.html`/`.txt` (generated by `update-help.sh` on a unix-like shell with a pre-3.0 version of CMake.)
+either in this directory or online at <http://github.com/rpavlik/cmake-modules/blob/master/module-help.txt>
+for more information on individual modules. Since it requires an older CMake for generation,
+the docs might get out of date, sorry - but you can always look at the files themselves.
+
+
+Licenses
+--------
+
+The modules that I wrote myself are all subject to this license:
+
+> Copyright Iowa State University 2009-2014,
+> or Copyright Sensics, Inc. 2014-2015,
+> or Copyright Ryan A. Pavlik 2009-2015
+>
+> Distributed under the Boost Software License, Version 1.0.
+>
+> (See accompanying file `LICENSE_1_0.txt` or copy at
+> <http://www.boost.org/LICENSE_1_0.txt>)
+
+Modules based on those included with CMake are under the OSI-approved
+BSD license, which is included in each of those modules.  A few other modules
+are modified from other sources - when in doubt, look at the `.cmake`.
+
+If you'd like to contribute, that would be great! Just make sure to include
+the license boilerplate in your module, and send a pull request.
+
+Important License Note!
+-----------------------
+
+If you find this file inside of another project, rather at the top-level
+directory, you're in a separate project that is making use of these modules.
+That separate project can (and probably does) have its own license specifics.
+
+
+[1]: http://github.com/apenwarr/git-subtree  "Git Subtree master"
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..ce7ccf9
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,41 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## project src cmake
+##
+################################################################################
+
+
+add_subdirectory (cmake/preInstall)
+
+# force unit tests at compile time, and set them to be more verbose than
+# cmake's default "test" target:
+set(THIS_UNITTESTS "${THIS_PROJECT_NAME}_unittests")
+set(utest_cmd ${CMAKE_CTEST_COMMAND} --force-new-ctest-process --output-on-failure)
+add_custom_target(${THIS_UNITTESTS}
+    ALL
+    COMMAND ${utest_cmd}
+    )
+
+add_subdirectory (c++)
+add_subdirectory (python)
+add_subdirectory (demo)
+add_subdirectory (cmake/postInstall)
diff --git a/src/c++/CMakeLists.txt b/src/c++/CMakeLists.txt
new file mode 100644
index 0000000..94f189a
--- /dev/null
+++ b/src/c++/CMakeLists.txt
@@ -0,0 +1,106 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## Configuration file for the c++ subfolder
+##
+## author Come Raczy
+##
+################################################################################
+
+set(CMAKE_SKIP_BUILD_RPATH  FALSE)
+set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
+set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+
+set (THIS_CXX_COMMMON_CMAKE "${THIS_MODULE_DIR}/cxxCommon.cmake")
+set (THIS_CXX_EXECUTABLE_CMAKE "${THIS_MODULE_DIR}/cxxExecutable.cmake")
+set (THIS_CXX_LIBRARY_CMAKE "${THIS_MODULE_DIR}/cxxLibrary.cmake")
+set (THIS_CXX_CONFIGURE_CMAKE "${THIS_MODULE_DIR}/cxxConfigure.cmake")
+set (THIS_CXX_TEST_LIBRARY_CMAKE "${THIS_MODULE_DIR}/cxxTestLibrary.cmake")
+
+include ("${THIS_CXX_CONFIGURE_CMAKE}")
+
+##
+## refresh git label at build-time
+##
+add_dependencies(${THIS_OPT} ${CXX_BUILDTIME_CONFIG_TARGET})
+
+##
+## run cppcheck if any cxx source has been updated:
+##
+if (PYTHONINTERP_FOUND)
+    if(${DEVELOPER_MODE})
+        file(GLOB_RECURSE ALL_CXX_FILES *.cpp *.cc *.hh)
+
+        add_custom_command(
+            OUTPUT cppcheck.done
+            DEPENDS ${ALL_CXX_FILES}
+            COMMAND ${PYTHON_EXECUTABLE} ${THIS_SOURCE_QC_DIR}/run_cppcheck.py ${CMAKE_CURRENT_SOURCE_DIR}
+            COMMENT "Running c++ static analyzer")
+
+        set(THIS_CPPCHECK "${THIS_PROJECT_NAME}_cppcheck")
+        add_custom_target(${THIS_CPPCHECK} ALL DEPENDS cppcheck.done)
+
+        add_dependencies(${THIS_OPT} ${THIS_CPPCHECK})
+    endif()
+endif ()
+
+##
+## Build all the libraries for the project
+##
+add_subdirectory (lib)
+
+
+##
+## build all the applications for the project
+##
+if (NOT WIN32)
+    add_subdirectory (bin)
+endif ()
+
+##
+## build the documentation when available
+##
+include  (FindDoxygen)
+message (STATUS "Doxygen: ${DOXYGEN_EXECUTABLE}. Dot: ${DOXYGEN_DOT_EXECUTABLE}.")
+if (DOXYGEN_FOUND)
+    set (DOXYFILE_SRC ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in)
+    set (DOXYFILE_SRC2 ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile.in)
+    set (DOXYFILE ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile)
+    message (STATUS "Creating Doxygen config file: ${DOXYFILE}")
+
+    # configure doxyfile twice, once at config time and once at build time:
+    set (BUILDTIME_WORKFLOW_VERSION "@WORKFLOW_VERSION@")
+    configure_file(${DOXYFILE_SRC} ${DOXYFILE_SRC2} @ONLY)
+
+    # set doxyfile to update from build-time configuration info:
+    set (THIS_BUILDTIME_DOXYGEN_TARGET "${THIS_PROJECT_NAME}_doxygen_buildtime_config")
+    add_custom_target(${THIS_BUILDTIME_DOXYGEN_TARGET}
+        DEPENDS ${THIS_BUILDTIME_CONFIG_TARGET}
+        COMMAND ${CMAKE_COMMAND}
+            -D CONFIG_FILE=${THIS_BUILDTIME_CONFIG_FILE}
+            -D SOURCE_FILE=${DOXYFILE_SRC2}
+            -D DEST_FILE=${DOXYFILE}
+            -P ${THIS_MODULE_DIR}/buildTimeConfigure.cmake)
+    add_custom_target(doc
+        DEPENDS ${THIS_BUILDTIME_DOXYGEN_TARGET}
+        COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYFILE})
+endif ()
+
diff --git a/src/c++/Doxyfile.in b/src/c++/Doxyfile.in
new file mode 100644
index 0000000..6162648
--- /dev/null
+++ b/src/c++/Doxyfile.in
@@ -0,0 +1,1522 @@
+# Doxyfile 1.6.1
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+#       TAG = value [value, ...]
+# For lists items can also be appended using:
+#       TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME           = @THIS_PROJECT_NAME@ 
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER         = @BUILDTIME_WORKFLOW_VERSION@
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = @CMAKE_BINARY_DIR@/src/c++/doxygen
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS         = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
+# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak,
+# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF       =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES        = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+
+STRIP_FROM_PATH        = @CMAKE_SOURCE_DIR@/c++/ @CMAKE_BINARY_DIR@/c++/
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful is your file systems
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like regular Qt-style comments
+# (thus requiring an explicit @brief command for a brief description.)
+
+JAVADOC_AUTOBRIEF      = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
+# interpret the first line (until the first dot) of a Qt-style
+# comment as the brief description. If set to NO, the comments
+# will behave just like regular Qt-style comments (thus requiring
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE               = 4
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for
+# Java. For instance, namespaces will be presented as packages, qualified
+# scopes will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources only. Doxygen will then generate output that is more tailored for
+# Fortran.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for
+# VHDL.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it parses.
+# With this tag you can assign which parser to use for a given extension.
+# Doxygen has a built-in mapping, but you can override or extend it using this tag.
+# The format is ext=language, where ext is a file extension, and language is one of
+# the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP,
+# Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat
+# .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran),
+# use: inc=Fortran f=C. Note that for custom extensions you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
+
+EXTENSION_MAPPING      =
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+
+BUILTIN_STL_SUPPORT    = YES
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
+# Doxygen will parse them like normal C++ but will assume all classes use public
+# instead of private inheritance when no explicit protection keyword is present.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate getter
+# and setter methods for a property. Setting this option to YES (the default)
+# will make doxygen to replace the get and set methods by a property in the
+# documentation. This will only work if the methods are indeed getting or
+# setting a simple type. If this is not the case, or you want to show the
+# methods anyway, you should set this option to NO.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING            = YES
+
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
+# is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically
+# be useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
+# determine which symbols to keep in memory and which to flush to disk.
+# When the cache is full, less often used symbols will be written to disk.
+# For small to medium size projects (<1000 input files) the default value is
+# probably good enough. For larger projects a too small cache size can cause
+# doxygen to be busy swapping symbols to and from disk most of the time
+# causing a significant performance penality.
+# If the system has enough physical memory increasing the cache will improve the
+# performance by keeping more symbols in memory. Note that the value works on
+# a logarithmic scale so increasing the size by one will rougly double the
+# memory usage. The cache size is given by this formula:
+# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols
+
+SYMBOL_CACHE_SIZE      = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL            = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE        = YES
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC         = YES 
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base
+# name of the file that contains the anonymous namespace. By default
+# anonymous namespace are hidden.
+
+EXTRACT_ANON_NSPACES   = YES
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS          = YES
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES       = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the (brief and detailed) documentation of class members so that constructors and destructors are listed first. If set to NO (the default) the constructors will appear in the respective orders defined by SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
+# hierarchy of group names into alphabetical order. If set to NO (the default)
+# the group names will appear in their defined order.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or define consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and defines in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES        = YES
+
+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation. The default is NO.
+
+SHOW_DIRECTORIES       = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
+# This will remove the Files entry from the Quick Index and from the
+# Folder Tree View (if specified). The default is YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
+# Namespaces page.
+# This will remove the Namespaces entry from the Quick Index
+# and from the Folder Tree View (if specified). The default is YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by
+# doxygen. The layout file controls the global structure of the generated output files
+# in an output format independent way. The create the layout file that represents
+# doxygen's defaults, run doxygen with the -l option. You can optionally specify a
+# file name after the option, if omitted DoxygenLayout.xml will be used as the name
+# of the layout file.
+
+LAYOUT_FILE            =
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS               = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be abled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+
+WARN_NO_PARAMDOC       = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  = @CMAKE_SOURCE_DIR@/src/c++
+
+# should we both with the config.h? -- if so it is here:
+#INPUT                  = @CMAKE_BINARY_DIR@/src/c++/lib/common
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
+# also the default input encoding. Doxygen uses libiconv (or the iconv built
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
+# the list of possible encodings.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          =
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE              = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                =
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
+# directories that are symbolic links (a Unix filesystem feature) are excluded
+# from the input.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS       =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.
+# If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis.
+# Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match.
+# The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
+# is applied to all files.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES    = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER         = YES
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code.
+# Otherwise they will link to the documentation.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX     = YES
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header.
+
+HTML_HEADER            =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER            =
+
+# If the HTML_TIMESTAMP tag is set to YES then the generated HTML
+# documentation will contain the timesstamp.
+
+HTML_TIMESTAMP         = NO
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET        =
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS     = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded. For this to work a browser that supports
+# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
+# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files
+# will be generated that can be used as input for Apple's Xcode 3
+# integrated development environment, introduced with OSX 10.5 (Leopard).
+# To create a documentation set, doxygen will generate a Makefile in the
+# HTML output directory. Running make will produce the docset in that
+# directory and running "make install" will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
+# it at startup.
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information.
+
+GENERATE_DOCSET        = NO
+
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
+# feed. A documentation feed provides an umbrella under which multiple
+# documentation sets from a single provider (such as a company or product suite)
+# can be grouped.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
+# should uniquely identify the documentation set bundle. This should be a
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
+# will append .docset to the name.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP      = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE               =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION           =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI           = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file
+# content.
+
+CHM_INDEX_ENCODING     =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER
+# are set, an additional index file will be generated that can be used as input for
+# Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated
+# HTML documentation.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
+# be used to specify the file name of the resulting .qch file.
+# The path specified is relative to the HTML output folder.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#namespace
+
+QHP_NAMESPACE          =
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#virtual-folders
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add.
+# For more information please see
+# http://doc.trolltech.com/qthelpproject.html#custom-filters
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">Qt Help Project / Custom Filters</a>.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's
+# filter section matches.
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">Qt Help Project / Filter Attributes</a>.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
+# be used to specify the location of Qt's qhelpgenerator.
+# If non-empty doxygen will try to run qhelpgenerator on the generated
+# .qhp file.
+
+QHG_LOCATION           =
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
+DISABLE_INDEX          = NO
+
+# This tag can be used to set the number of enum values (range [1..20])
+# that doxygen will group on one line in the generated HTML documentation.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information.
+# If the tag value is set to YES, a side panel will be generated
+# containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
+# Windows users are probably better off using the HTML help feature.
+
+GENERATE_TREEVIEW      = NO
+
+# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
+# and Class Hierarchy pages using a tree view instead of an ordered list.
+
+USE_INLINE_TREES       = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH         = 250
+
+# Use this tag to change the font size of Latex formulas included
+# as images in the HTML documentation. The default is 10. Note that
+# when you change the font size after a successful doxygen run you need
+# to manually remove any form_*.png images from the HTML output directory
+# to force them to be regenerated.
+
+FORMULA_FONTSIZE       = 10
+
+# When the SEARCHENGINE tag is enable doxygen will generate a search box for the HTML output. The underlying search engine uses javascript
+# and DHTML and should work on any modern browser. Note that when using HTML help (GENERATE_HTMLHELP) or Qt help (GENERATE_QHP)
+# there is already a search function so this one should typically
+# be disabled.
+
+SEARCHENGINE           = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX         = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, a4wide, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE             = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES         =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER           =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE        = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES     = NO
+
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include source code with syntax highlighting in the LaTeX output. Note that which sources are shown also depends on other settings such as SOURCE_BROWSER.
+
+LATEX_SOURCE_CODE      = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE    =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION          = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT             = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA             =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD                =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader.
+# This is useful
+# if you want to understand what is going on.
+# On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all function-like macros that are alone
+# on a line, have an all uppercase name, and do not end with a semicolon. Such
+# function macros are typically used for boiler-plate code, and will confuse
+# the parser if not removed.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+#
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+#
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS        = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option is superseded by the HAVE_DOT option below. This is only a
+# fallback. It is recommended to install and use dot, since it yields more
+# powerful graphs.
+
+CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH            =
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT               = YES
+
+# By default doxygen will write a font called FreeSans.ttf to the output
+# directory and reference it in all dot files that doxygen generates. This
+# font does not include all possible unicode characters however, so when you need
+# these (or just want a differently looking font) you can specify the font name
+# using DOT_FONTNAME. You need need to make sure dot is able to find the font,
+# which can be done by putting it in a standard location or by setting the
+# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory
+# containing the font.
+
+DOT_FONTNAME           = FreeSans
+
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
+# The default size is 10pt.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the output directory to look for the
+# FreeSans.ttf font (which doxygen will put there itself). If you specify a
+# different font using DOT_FONTNAME you can set the path where dot
+# can find it using this tag.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK               = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS     = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH          = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then
+# doxygen will generate a call dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable call graphs
+# for selected functions only using the \callgraph command.
+
+CALL_GRAPH             = YES
+
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
+# doxygen will generate a caller dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable caller
+# graphs for selected functions only using the \callergraph command.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT       = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
+# nodes that will be shown in the graph. If the number of nodes in a graph
+# becomes larger than this value, doxygen will truncate the graph, which is
+# visualized by representing a node as a red box. Note that doxygen if the
+# number of direct children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not
+# seem to support this out of the box. Warning: Depending on the platform used,
+# enabling this option may lead to badly anti-aliased labels on the edges of
+# a graph (i.e. they become hard to read).
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP            = YES
diff --git a/src/c++/README.cxx.coordinates.txt b/src/c++/README.cxx.coordinates.txt
new file mode 100644
index 0000000..4930641
--- /dev/null
+++ b/src/c++/README.cxx.coordinates.txt
@@ -0,0 +1,8 @@
+
+C++ genomic coordinate convention:
+
+All internal position numbers are zero indexed.
+
+All internal intervals are zero-indexed and end after the last position in the interval (BED convention)
+Example:
+start_pos: 0, end_pos: 100 represents [1,100]
diff --git a/src/c++/bin/CMakeLists.txt b/src/c++/bin/CMakeLists.txt
new file mode 100644
index 0000000..4cdc2d7
--- /dev/null
+++ b/src/c++/bin/CMakeLists.txt
@@ -0,0 +1,44 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## Configuration file for the c++/bin subdirectory
+##
+## author Come Raczy
+##
+################################################################################
+
+include(${THIS_CXX_EXECUTABLE_CMAKE})
+
+file (GLOB THIS_PROGRAM_SOURCE_LIST [a-zA-Z0-9]*.cpp)
+
+##
+## Generic rule for all the other programs
+##
+foreach(THIS_PROGRAM_SOURCE ${THIS_PROGRAM_SOURCE_LIST})
+    get_filename_component(THIS_PROGRAM ${THIS_PROGRAM_SOURCE} NAME_WE)
+    set(THIS_APPLICATION_LIB ${THIS_PROJECT_NAME}_${THIS_PROGRAM})
+    add_executable        (${THIS_PROGRAM} ${THIS_PROGRAM_SOURCE})
+    target_link_libraries (${THIS_PROGRAM}  ${THIS_APPLICATION_LIB} ${THIS_AVAILABLE_LIBRARIES}
+                           ${HTSLIB_LIBRARY} ${Boost_LIBRARIES}
+                           ${THIS_ADDITIONAL_LIB})
+    install(TARGETS ${THIS_PROGRAM} RUNTIME DESTINATION ${THIS_LIBEXECDIR})
+endforeach()
+
diff --git a/src/c++/bin/CheckSVLoci.cpp b/src/c++/bin/CheckSVLoci.cpp
new file mode 100644
index 0000000..aa8f33a
--- /dev/null
+++ b/src/c++/bin/CheckSVLoci.cpp
@@ -0,0 +1,28 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "applications/CheckSVLoci/CheckSVLoci.hh"
+
+
+int
+main(int argc, char* argv[])
+{
+    return CheckSVLoci().run(argc,argv);
+}
diff --git a/src/c++/bin/DumpSVLoci.cpp b/src/c++/bin/DumpSVLoci.cpp
new file mode 100644
index 0000000..3497cd7
--- /dev/null
+++ b/src/c++/bin/DumpSVLoci.cpp
@@ -0,0 +1,28 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "applications/DumpSVLoci/DumpSVLoci.hh"
+
+
+int
+main(int argc, char* argv[])
+{
+    return DumpSVLoci().run(argc,argv);
+}
diff --git a/src/c++/bin/EstimateSVLoci.cpp b/src/c++/bin/EstimateSVLoci.cpp
new file mode 100644
index 0000000..cd225fd
--- /dev/null
+++ b/src/c++/bin/EstimateSVLoci.cpp
@@ -0,0 +1,28 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "applications/EstimateSVLoci/EstimateSVLoci.hh"
+
+
+int
+main(int argc, char* argv[])
+{
+    return EstimateSVLoci().run(argc,argv);
+}
diff --git a/src/c++/bin/GenerateSVCandidates.cpp b/src/c++/bin/GenerateSVCandidates.cpp
new file mode 100644
index 0000000..30dbf06
--- /dev/null
+++ b/src/c++/bin/GenerateSVCandidates.cpp
@@ -0,0 +1,28 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "applications/GenerateSVCandidates/GenerateSVCandidates.hh"
+
+
+int
+main(int argc, char* argv[])
+{
+    return GenerateSVCandidates().run(argc,argv);
+}
diff --git a/src/c++/bin/GetAlignmentStats.cpp b/src/c++/bin/GetAlignmentStats.cpp
new file mode 100644
index 0000000..58fbd27
--- /dev/null
+++ b/src/c++/bin/GetAlignmentStats.cpp
@@ -0,0 +1,28 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "applications/GetAlignmentStats/GetAlignmentStats.hh"
+
+
+int
+main(int argc, char* argv[])
+{
+    return GetAlignmentStats().run(argc,argv);
+}
diff --git a/src/c++/bin/GetChromDepth.cpp b/src/c++/bin/GetChromDepth.cpp
new file mode 100644
index 0000000..74a47ca
--- /dev/null
+++ b/src/c++/bin/GetChromDepth.cpp
@@ -0,0 +1,28 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "applications/GetChromDepth/GetChromDepth.hh"
+
+
+int
+main(int argc, char* argv[])
+{
+    return GetChromDepth().run(argc,argv);
+}
diff --git a/src/c++/bin/MergeAlignmentStats.cpp b/src/c++/bin/MergeAlignmentStats.cpp
new file mode 100644
index 0000000..0a8b233
--- /dev/null
+++ b/src/c++/bin/MergeAlignmentStats.cpp
@@ -0,0 +1,28 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "applications/MergeAlignmentStats/MergeAlignmentStats.hh"
+
+
+int
+main(int argc, char* argv[])
+{
+    return MergeAlignmentStats().run(argc,argv);
+}
diff --git a/src/c++/bin/MergeEdgeStats.cpp b/src/c++/bin/MergeEdgeStats.cpp
new file mode 100644
index 0000000..2345a24
--- /dev/null
+++ b/src/c++/bin/MergeEdgeStats.cpp
@@ -0,0 +1,28 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "applications/MergeEdgeStats/MergeEdgeStats.hh"
+
+
+int
+main(int argc, char* argv[])
+{
+    return MergeEdgeStats().run(argc,argv);
+}
diff --git a/src/c++/bin/MergeSVLoci.cpp b/src/c++/bin/MergeSVLoci.cpp
new file mode 100644
index 0000000..7643604
--- /dev/null
+++ b/src/c++/bin/MergeSVLoci.cpp
@@ -0,0 +1,29 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "applications/MergeSVLoci/MergeSVLoci.hh"
+
+
+int
+main(int argc, char* argv[])
+{
+    return MergeSVLoci().run(argc,argv);
+}
+
diff --git a/src/c++/bin/SummarizeAlignmentStats.cpp b/src/c++/bin/SummarizeAlignmentStats.cpp
new file mode 100644
index 0000000..af5a917
--- /dev/null
+++ b/src/c++/bin/SummarizeAlignmentStats.cpp
@@ -0,0 +1,28 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "applications/SummarizeAlignmentStats/SummarizeAlignmentStats.hh"
+
+
+int
+main(int argc, char* argv[])
+{
+    return SummarizeAlignmentStats().run(argc,argv);
+}
diff --git a/src/c++/bin/SummarizeSVLoci.cpp b/src/c++/bin/SummarizeSVLoci.cpp
new file mode 100644
index 0000000..e8f2d23
--- /dev/null
+++ b/src/c++/bin/SummarizeSVLoci.cpp
@@ -0,0 +1,28 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "applications/SummarizeSVLoci/SummarizeSVLoci.hh"
+
+
+int
+main(int argc, char* argv[])
+{
+    return SummarizeSVLoci().run(argc,argv);
+}
diff --git a/src/c++/bin/TestAssembler.cpp b/src/c++/bin/TestAssembler.cpp
new file mode 100644
index 0000000..e4cf312
--- /dev/null
+++ b/src/c++/bin/TestAssembler.cpp
@@ -0,0 +1,28 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "applications/TestAssembler/TestAssembler.hh"
+
+
+int
+main(int argc, char* argv[])
+{
+    return TestAssembler().run(argc,argv);
+}
diff --git a/src/c++/lib/CMakeLists.txt b/src/c++/lib/CMakeLists.txt
new file mode 100644
index 0000000..e84caee
--- /dev/null
+++ b/src/c++/lib/CMakeLists.txt
@@ -0,0 +1,53 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## Configuration file for the lib subfolder
+##
+## author Ole Schulz-Trieglaff
+##
+################################################################################
+
+##
+## List of libraries
+##
+## note that order approximates dependency chain, where libraries are listed
+## after libraries upon which they depend:
+set (THIS_ALL_LIBRARIES blt_util common htsapi appstats options assembly alignment format svgraph manta)
+
+##
+## Build all the libraries for the project
+## THIS_AVAILABLE_LIBRARIES is incrementally updated
+##
+set (THIS_AVAILABLE_LIBRARIES "")
+foreach (THIS_LIB_DIR ${THIS_ALL_LIBRARIES})
+    add_subdirectory(${THIS_LIB_DIR})
+
+    set(THIS_AVAILABLE_LIBRARIES ${THIS_PROJECT_NAME}_${THIS_LIB_DIR} ${THIS_AVAILABLE_LIBRARIES})
+endforeach ()
+
+set (THIS_AVAILABLE_LIBRARIES ${THIS_AVAILABLE_LIBRARIES} PARENT_SCOPE)
+
+
+#
+# handle applications separately
+#
+add_subdirectory(applications)
+
diff --git a/src/c++/lib/README.txt b/src/c++/lib/README.txt
new file mode 100644
index 0000000..b77c3ea
--- /dev/null
+++ b/src/c++/lib/README.txt
@@ -0,0 +1,35 @@
+alignment:
+sequence alignment utilities
+
+applications/X:
+code specific to command-line application X
+
+appstats:
+shared performance tracking code between applications
+
+blt_util:
+general utility functions from manta/starling/strelka/gvcftools
+
+common:
+general utility functions from CASAVA/Grouper/Isaac
+
+format:
+conversion of data into external formats
+
+htsapi:
+various c++ wrapper objects built on top of samtools/htslib
+and other utilities for standard genomic indexed file formats
+like bam/cram,bed,vcf, etc...
+
+manta:
+common code to the manta project (ie. too specific for general utility
+libraries but does not fit the category of another library). This is also
+the default library to which logic should be added until there is a clear
+pattern to justify a new library.
+
+options:
+command-line options objects which are shared between applications
+
+svgraph:
+SV locus graph components
+
diff --git a/src/c++/lib/alignment/AlignerBase.hh b/src/c++/lib/alignment/AlignerBase.hh
new file mode 100644
index 0000000..d5568f1
--- /dev/null
+++ b/src/c++/lib/alignment/AlignerBase.hh
@@ -0,0 +1,119 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "alignment/Alignment.hh"
+#include "alignment/AlignmentScores.hh"
+#include "blt_util/align_path.hh"
+
+
+// #define DEBUG_ALN // Standard debug output
+// #define DEBUG_ALN_MATRIX // Dump full edit-matrix tables to stderr. Does not scale to non-trivial ref/query size!
+
+
+#ifdef DEBUG_ALN_MATRIX
+#include <iosfwd>
+#endif
+
+
+/// shared methods for all aligners
+///
+template <typename ScoreType>
+struct AlignerBase
+{
+    AlignerBase(
+        const AlignmentScores<ScoreType>& scores) :
+        _scores(scores)
+    {}
+
+    /// read-only access to the aligner's scores:
+    const AlignmentScores<ScoreType>&
+    getScores() const
+    {
+        return _scores;
+    }
+
+    /// recover a path alignment score without aligning, requires SEQ_MATCH style CIGAR
+    ///
+    ScoreType
+    getPathScore(
+        const ALIGNPATH::path_t& apath,
+        const bool isScoreOffEdge = true) const;
+
+    /// recover the maximum partial path alignment score (going left->right) without aligning, requires SEQ_MATCH style CIGAR
+    ///
+    ScoreType
+    getMaxPathScore(
+        const ALIGNPATH::path_t& apath,
+        unsigned& maxReadOffset,
+        unsigned& maxRefOffset,
+        const bool isScoreOffEdge = true) const;
+
+protected:
+
+    static
+    uint8_t
+    max3(
+        ScoreType& max,
+        const ScoreType v0,
+        const ScoreType v1,
+        const ScoreType v2)
+    {
+        max=v0;
+        uint8_t ptr=0;
+        if (v1>v0)
+        {
+            max=v1;
+            ptr=1;
+        }
+        if (v2>max)
+        {
+            max=v2;
+            ptr=2;
+        }
+        return ptr;
+    }
+
+#ifdef DEBUG_ALN_MATRIX
+    /// write out subset of matrix of scores back-trace pointers for debug,for
+    /// one reference
+    template <typename SymIter, typename MatrixType, typename ScoreValType>
+    void
+    dumpSingleRefTable(
+        const SymIter refBegin, const SymIter refEnd,
+        const size_t querySize,
+        const MatrixType& ptrMatrix,
+        const std::vector<std::vector<ScoreValType>>& storeScores,
+        const char refSym,
+        const AlignState::index_t sIndex,
+        unsigned& storeIndex,
+        std::ostream& os) const;
+#endif
+
+    const AlignmentScores<ScoreType> _scores;
+};
+
+
+#include "alignment/AlignerBaseImpl.hh"
diff --git a/src/c++/lib/alignment/AlignerBaseImpl.hh b/src/c++/lib/alignment/AlignerBaseImpl.hh
new file mode 100644
index 0000000..b6e3a27
--- /dev/null
+++ b/src/c++/lib/alignment/AlignerBaseImpl.hh
@@ -0,0 +1,215 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include <cassert>
+
+#ifdef DEBUG_ALN_MATRIX
+#include <boost/io/ios_state.hpp>
+
+#include <iomanip>
+#include <iostream>
+#endif
+
+
+#ifdef DEBUG_ALN_MATRIX
+template <typename ScoreType>
+template <typename SymIter, typename MatrixType, typename ScoreValType>
+void
+AlignerBase<ScoreType>::
+dumpSingleRefTable(
+    const SymIter refBegin, const SymIter refEnd,
+    const size_t querySize,
+    const MatrixType& ptrMatrix,
+    const std::vector<std::vector<ScoreValType>>& storeScores,
+    const char refSym,
+    const AlignState::index_t sIndex,
+    unsigned& storeIndex,
+    std::ostream& os) const
+{
+    boost::io::ios_all_saver guard(os);
+
+    auto printVal = [](
+                        const ScoreType& val,
+                        const char fromSym,
+                        std::ostream& pos)
+    {
+        if (val<-900)
+        {
+            pos << " XX";
+        }
+        else
+        {
+            pos << std::setfill(' ') << std::setw(3) << val;
+        }
+        pos << fromSym;
+    };
+
+    auto printQueryRow = [&](
+                             const unsigned qrefIndex)
+    {
+        for (unsigned queryIndex(0); queryIndex <= querySize; ++queryIndex)
+        {
+            const auto& val(storeScores[storeIndex][queryIndex].getScore(sIndex));
+            const char fromSym(AlignState::symbol(ptrMatrix.val(queryIndex, qrefIndex).getStatePtr(sIndex)));
+            printVal(val, fromSym, os);
+        }
+        os << "\n";
+    };
+
+    os << "# - ";
+    printQueryRow(0);
+    unsigned refIndex(0);
+    for (SymIter refIter(refBegin); refIter != refEnd; ++refIter, ++refIndex)
+    {
+        os << refSym << " " << *refIter << " ";
+        storeIndex++;
+        printQueryRow(refIndex+1);
+    }
+}
+#endif
+
+
+
+template <typename ScoreType>
+ScoreType
+AlignerBase<ScoreType>::
+getPathScore(
+    const ALIGNPATH::path_t& apath,
+    const bool isScoreOffEdge) const
+{
+    using namespace ALIGNPATH;
+
+    ScoreType val(0);
+
+    // note that the intent of this function was to replicate the underling aligner and thus
+    // not penalize insert-delete transitions, however as written the open score is added twice for
+    // such an event. This turns out to perform much better for the performance of this tool as
+    // a variant 'arm' validator. so the unintended code is staying in place for now.
+    //
+    /// TODO: reevaluate policy for insertion-deletion state transition score
+
+    for (const path_segment& ps : apath)
+    {
+        bool isIndel(false); // placement of isIndel inside of this loop is the 'bug'
+        switch (ps.type)
+        {
+        case MATCH:
+            assert(false && "Unexpected MATCH segment"); // if MATCH segments exist, then you're using the wrong type of CIGAR for this function
+            break;
+        case SEQ_MATCH:
+            val += (_scores.match * ps.length);
+            isIndel = false;
+            break;
+        case SEQ_MISMATCH:
+            val += (_scores.mismatch * ps.length);
+            isIndel = false;
+            break;
+        case INSERT:
+        case DELETE:
+            if (! isIndel) val += _scores.open;
+            val += (_scores.extend * ps.length);
+            isIndel = true;
+            break;
+        case SOFT_CLIP:
+            if (isScoreOffEdge) val += (_scores.offEdge * ps.length);
+            isIndel = false;
+            break;
+        default:
+            break;
+        }
+    }
+    return val;
+}
+
+
+
+template <typename ScoreType>
+ScoreType
+AlignerBase<ScoreType>::
+getMaxPathScore(
+    const ALIGNPATH::path_t& apath,
+    unsigned& maxReadOffset,
+    unsigned& maxRefOffset,
+    const bool isScoreOffEdge) const
+{
+    using namespace ALIGNPATH;
+
+    ScoreType val(0);
+    unsigned readOffset(0);
+    unsigned refOffset(0);
+
+    ScoreType maxVal(0);
+    maxReadOffset=0;
+    maxRefOffset=0;
+
+    for (const path_segment& ps : apath)
+    {
+        bool isIndel(false); // unintended 'bug' with positive results, see TODO note above
+        switch (ps.type)
+        {
+        case MATCH:
+            assert(false && "Unexpected MATCH segment"); // if MATCH segments exist, then you're using the wrong type of CIGAR for this function
+            break;
+        case SEQ_MATCH:
+            val += (_scores.match * ps.length);
+            readOffset += ps.length;
+            refOffset += ps.length;
+            isIndel = false;
+            break;
+        case SEQ_MISMATCH:
+            val += (_scores.mismatch * ps.length);
+            readOffset += ps.length;
+            refOffset += ps.length;
+            isIndel = false;
+            break;
+        case INSERT:
+            if (! isIndel) val += _scores.open;
+            val += (_scores.extend * ps.length);
+            readOffset += ps.length;
+            isIndel = true;
+            break;
+        case DELETE:
+            if (! isIndel) val += _scores.open;
+            val += (_scores.extend * ps.length);
+            refOffset += ps.length;
+            isIndel = true;
+            break;
+        case SOFT_CLIP:
+            if (isScoreOffEdge) val += (_scores.offEdge * ps.length);
+            readOffset += ps.length;
+            isIndel = false;
+            break;
+        default:
+            break;
+        }
+
+        if (val>maxVal)
+        {
+            maxVal = val;
+            maxReadOffset = readOffset;
+            maxRefOffset = refOffset;
+        }
+    }
+    return maxVal;
+}
diff --git a/src/c++/lib/alignment/AlignerUtil.hh b/src/c++/lib/alignment/AlignerUtil.hh
new file mode 100644
index 0000000..596c897
--- /dev/null
+++ b/src/c++/lib/alignment/AlignerUtil.hh
@@ -0,0 +1,87 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+/// \brief align a contig across two breakend regions
+///
+
+
+#pragma once
+
+#include "Alignment.hh"
+#include "blt_util/align_path.hh"
+
+
+struct AlignerUtil
+{
+    static
+    void
+    updatePath(
+        ALIGNPATH::path_t& path,
+        ALIGNPATH::path_segment& ps,
+        ALIGNPATH::align_t atype)
+    {
+        if (ps.type == atype) return;
+        if (ps.type != ALIGNPATH::NONE) path.push_back(ps);
+        ps.type = atype;
+        ps.length = 0;
+    }
+};
+
+
+
+/// bookkeeping variables used during alignment backtrace
+template <typename ScoreType>
+struct BackTrace
+{
+    ScoreType max = 0;
+    AlignState::index_t state = AlignState::MATCH;
+    unsigned queryBegin = 0;
+    unsigned refBegin = 0;
+    bool isInit = false;
+};
+
+
+
+/// track values needed to run the alignment backtrace:
+template <typename ScoreType>
+void
+updateBacktrace(
+    const ScoreType thisMax,
+    const unsigned refIndex,
+    const unsigned queryIndex,
+    BackTrace<ScoreType>& btrace,
+    const AlignState::index_t state = AlignState::MATCH)
+{
+    if ( (! btrace.isInit) || (thisMax>btrace.max))
+    {
+        btrace.max=thisMax;
+        btrace.refBegin=refIndex;
+        btrace.queryBegin=queryIndex;
+        btrace.isInit=true;
+        btrace.state = state;
+    }
+}
+
+
+
+
diff --git a/src/c++/lib/alignment/Alignment.cpp b/src/c++/lib/alignment/Alignment.cpp
new file mode 100644
index 0000000..2263c54
--- /dev/null
+++ b/src/c++/lib/alignment/Alignment.cpp
@@ -0,0 +1,33 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "alignment/Alignment.hh"
+
+#include <iostream>
+
+
+std::ostream&
+operator<<(std::ostream& os, const Alignment& align)
+{
+    os << "start: " << align.beginPos << " cigar: " << apath_to_cigar(align.apath);
+    return os;
+}
+
+
diff --git a/src/c++/lib/alignment/Alignment.hh b/src/c++/lib/alignment/Alignment.hh
new file mode 100644
index 0000000..4cdf569
--- /dev/null
+++ b/src/c++/lib/alignment/Alignment.hh
@@ -0,0 +1,112 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#pragma once
+
+#include "blt_util/align_path.hh"
+
+#include <iosfwd>
+
+/// minimal summary of a query sequence aligned to a reference, roughly
+/// following bam conventions for describing the alignment (apath trivially
+/// maps to CIGAR string segments)
+struct Alignment
+{
+    void
+    clear()
+    {
+        beginPos = 0;
+        apath.clear();
+    }
+
+    bool
+    isAligned()
+    const
+    {
+        return (! apath.empty());
+    }
+
+    pos_t beginPos;
+    ALIGNPATH::path_t apath;
+};
+
+std::ostream&
+operator<<(std::ostream& os, const Alignment& align);
+
+
+
+struct AlignState
+{
+    // note the order of this enumerator is important for bit packing in client code, in particular
+    // we rely on fitting the [MATCH->JUMP] states in 2 bits for the standard jump aligner
+    enum index_t
+    {
+        MATCH,
+        DELETE,
+        INSERT,
+        JUMP, // allows for an arbitrarily large hop between two reference regions
+        SPLICE,
+        JUMPINS = SPLICE, // analogous to jump state, but for very large insertions, reuse SPLICE state, in current applications we don't need both states in the same model.
+        SIZE
+    };
+
+    static
+    const char*
+    label(const index_t i)
+    {
+        switch (i)
+        {
+        case MATCH:
+            return "MATCH";
+        case DELETE:
+            return "DELETE";
+        case INSERT:
+            return "INSERT";
+        case JUMP:
+            return "JUMP";
+        case SPLICE:
+            return "SPLICE/JUMPINS";
+        default:
+            return "UNKNOWN";
+        }
+    }
+
+    static
+    char
+    symbol(const index_t i)
+    {
+        switch (i)
+        {
+        case MATCH:
+            return 'M';
+        case DELETE:
+            return 'D';
+        case INSERT:
+            return 'I';
+        case JUMP:
+            return 'J';
+        case SPLICE:
+            return 'N';
+        default:
+            return '?';
+        }
+    }
+
+};
diff --git a/src/c++/lib/alignment/AlignmentScores.hh b/src/c++/lib/alignment/AlignmentScores.hh
new file mode 100644
index 0000000..364a8fd
--- /dev/null
+++ b/src/c++/lib/alignment/AlignmentScores.hh
@@ -0,0 +1,48 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#pragma once
+
+
+template <typename ScoreType>
+struct AlignmentScores
+{
+    AlignmentScores(
+        ScoreType initMatch,
+        ScoreType initMismatch,
+        ScoreType initOpen,
+        ScoreType initExtend,
+        ScoreType initOffEdge,
+        bool initIsAllowEdgeInsertion = false) :
+        match(initMatch),
+        mismatch(initMismatch),
+        open(initOpen),
+        extend(initExtend),
+        offEdge(initOffEdge),
+        isAllowEdgeInsertion(initIsAllowEdgeInsertion)
+    {}
+
+    const ScoreType match; ///< match score
+    const ScoreType mismatch; ///< mismatch score (should be negative)
+    const ScoreType open; ///< gap open, gap of length N is scored (open + N * extend) (should be negative)
+    const ScoreType extend; ///< gap extend, gap of length N is scored (open + N * extend) (should be negative or zero)
+    const ScoreType offEdge; ///< score applied when query goes off the end of an edge (should be negative)
+    const bool isAllowEdgeInsertion; ///< are insertions allowed directly on the leading and trailing edges?
+};
diff --git a/src/c++/lib/alignment/AlignmentUtil.cpp b/src/c++/lib/alignment/AlignmentUtil.cpp
new file mode 100644
index 0000000..3925337
--- /dev/null
+++ b/src/c++/lib/alignment/AlignmentUtil.cpp
@@ -0,0 +1,199 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Ole Schulz-Trieglaff
+///
+
+#include "alignment/AlignmentUtil.hh"
+#include "blt_util/seq_util.hh"
+
+#include <cassert>
+
+//#define DEBUG_RS
+
+
+
+#ifdef DEBUG_RS
+#include "blt_util/log.hh"
+#include <iostream>
+#endif
+
+
+
+/// tests if prefix of sequence is aligned
+static
+bool
+hasAlignedPrefix(const Alignment& al)
+{
+    if (al.apath.empty()) return false;
+    return (is_segment_align_match(al.apath[0].type));
+}
+
+
+/// tests if suffix of sequence is aligned
+static
+bool
+hasAlignedSuffix(const Alignment& al)
+{
+    if (al.apath.empty()) return false;
+    return (is_segment_align_match(al.apath.back().type));
+}
+
+
+
+// check a jump alignment for consistency (only one end aligning)
+// FIXME: not used, need to think what makes an alignment consistent
+// (how about : total number of matches shouldn't exceed sequence length?)
+//bool
+//isConsistentAlignment(const JumpAlignmentResult<int>& res, const unsigned /*minAlignContext = 0*/)
+//{
+//    // not consistent if both unaligned
+//    if (! (res.align1.isAligned() && res.align2.isAligned()) ) return false;
+//
+//    return true;
+//}
+
+
+
+void
+getFwdStrandQuerySegments(
+    const JumpAlignmentResult<int>& align,
+    const std::string& querySeq,
+    const bool isBp2AlignedFirst,
+    const bool isBp1Reversed,
+    const bool isBp2Reversed,
+    std::string& bp1Seq,
+    std::string& bp2Seq,
+    std::string& insertSeq)
+{
+    const unsigned align1Size(apath_read_length(align.align1.apath));
+    const unsigned insertSize(align.jumpInsertSize);
+    const unsigned insertOffset(align1Size + insertSize);
+    const unsigned align2Size(apath_read_length(align.align2.apath));
+    const unsigned align2Offset(insertOffset + align2Size);
+
+    assert(querySeq.size() == align2Offset);
+
+    bp1Seq = querySeq.substr(0,align1Size);
+    insertSeq = querySeq.substr(align1Size,insertSize);
+    bp2Seq = querySeq.substr(insertOffset,align2Size);
+
+    if (isBp2AlignedFirst)
+    {
+        std::swap(bp1Seq,bp2Seq);
+    }
+
+    if (isBp1Reversed)
+    {
+        reverseCompStr(bp1Seq);
+        reverseCompStr(insertSeq);
+    }
+
+    if (isBp2Reversed)
+    {
+        reverseCompStr(bp2Seq);
+    }
+}
+
+
+
+void
+getExtendedContig(
+    const JumpAlignmentResult<int>& align,
+    const std::string& querySeq,
+    const std::string& ref1Seq,
+    const std::string& ref2Seq,
+    std::string& extendedContig)
+{
+    // extend aligned ref1Seq to the beginning
+    const std::string align1RefSeqExt = ref1Seq.substr(0, align.align1.beginPos);
+    // extend aligned ref2Seq to the end
+    const unsigned align2RefEnd = align.align2.beginPos + apath_ref_length(align.align2.apath);
+    const std::string align2RefSeqExt = ref2Seq.substr(align2RefEnd, (ref2Seq.size()-align2RefEnd));
+
+    extendedContig = align1RefSeqExt + querySeq + align2RefSeqExt;
+}
+
+
+
+/// extend the assembly contig to cover the whole target reference region after alignment
+void
+getExtendedContig(
+    const AlignmentResult<int>& alignment,
+    const std::string& querySeq,
+    const std::string& refSeq,
+    std::string& extendedContig)
+{
+    // extend aligned refSeq to the beginning
+    const std::string alignRefSeqExt1 = refSeq.substr(0, alignment.align.beginPos);
+    // extend aligned refSeq to the end
+    const unsigned alignRefEnd = alignment.align.beginPos + apath_ref_length(alignment.align.apath);
+    const std::string alignRefSeqExt2 = refSeq.substr(alignRefEnd, (refSeq.size()-alignRefEnd));
+
+    extendedContig = alignRefSeqExt1 + querySeq + alignRefSeqExt2;
+}
+
+
+
+void
+getFwdStrandInsertSegment(
+    const JumpAlignmentResult<int>& align,
+    const std::string& querySeq,
+    const bool isBp1Reversed,
+    std::string& insertSeq)
+{
+    const unsigned align1Size(apath_read_length(align.align1.apath));
+    const unsigned insertSize(align.jumpInsertSize);
+
+    insertSeq = querySeq.substr(align1Size,insertSize);
+
+    if (isBp1Reversed) reverseCompStr(insertSeq);
+}
+
+
+
+int
+estimateBreakPointPos(const Alignment& al, const unsigned refOffset)
+{
+    // -1 means no breakpoint found
+    int breakPointPosEstimate(-1);
+
+    const bool isPrefix = hasAlignedPrefix(al);
+    const bool isSuffix = hasAlignedSuffix(al);
+
+    if (! (isPrefix || isSuffix) )
+    {
+        return breakPointPosEstimate;
+    }
+
+    if (isPrefix)
+    {
+        breakPointPosEstimate = refOffset + al.beginPos /*+ isPrefix*/;
+    }
+    else if (isSuffix)
+    {
+        breakPointPosEstimate = refOffset + alignEnd(al) /*- isSuffix*/;
+    }
+
+    assert(breakPointPosEstimate>0);
+
+    return breakPointPosEstimate;
+}
diff --git a/src/c++/lib/alignment/AlignmentUtil.hh b/src/c++/lib/alignment/AlignmentUtil.hh
new file mode 100644
index 0000000..ae0ad0b
--- /dev/null
+++ b/src/c++/lib/alignment/AlignmentUtil.hh
@@ -0,0 +1,138 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Ole Schulz-Trieglaff
+///
+
+#pragma once
+
+#include "alignment/Alignment.hh"
+#include "alignment/GlobalJumpAligner.hh"
+#include "alignment/GlobalAligner.hh"
+
+
+/// return end position of alignment
+inline
+pos_t
+alignEnd(const Alignment& align)
+{
+    return (align.beginPos + ALIGNPATH::apath_ref_length(align.apath));
+}
+
+
+/// get begin position of alignment, accounting for a possibly reversed reference/alignment:
+///
+inline
+pos_t
+getAlignBeginOffset(
+    const Alignment& align,
+    const unsigned refSize,
+    const bool isReversed)
+{
+    if (isReversed)
+    {
+        return (refSize - alignEnd(align));
+    }
+    else
+    {
+        return align.beginPos;
+    }
+}
+
+
+/// get end position of alignment, accounting for a possibly reversed reference/alignment:
+///
+inline
+pos_t
+getAlignEndOffset(
+    const Alignment& align,
+    const unsigned refSize,
+    const bool isReversed)
+{
+    if (isReversed)
+    {
+        return (refSize - align.beginPos);
+    }
+    else
+    {
+        return alignEnd(align);
+    }
+}
+
+
+
+/// check a jump alignment for consistency (only one end aligning)
+/// FIXME: not used, need to think what makes an alignment consistent
+/// (how about : total number of matches shouldn't exceed sequence length?)
+//bool
+//isConsistentAlignment(const JumpAlignmentResult<int>& res, const unsigned minAlignContext = 0);
+
+
+/// extend the contig by padding the flanking regions of the aligned reference regions on each end
+void
+getExtendedContig(
+    const JumpAlignmentResult<int>& align,
+    const std::string& querySeq,
+    const std::string& ref1Seq,
+    const std::string& ref2Seq,
+    std::string& extendedContig);
+
+
+/// extend the somatic contig by padding the flanking regions of the aligned reference regions on each end
+void
+getExtendedContig(
+    const AlignmentResult<int>& alignment,
+    const std::string& querySeq,
+    const std::string& refSeq,
+    std::string& extendedContig);
+
+/// given a jump alignment and query sequence, return the bp1,insert and bp2 query sequences
+///
+/// the insert sequence is converted to fwd-strand by assuming it is "attached" to bp1
+/// note this is targeted for debug-code only
+///
+void
+getFwdStrandQuerySegments(
+    const JumpAlignmentResult<int>& align,
+    const std::string& querySeq,
+    const bool isBp2AlignedFirst,
+    const bool isBp1Reversed,
+    const bool isBp2Reversed,
+    std::string& bp1Seq,
+    std::string& bp2Seq,
+    std::string& insertSeq);
+
+
+/// given a jump alignment and query sequence, return the insert sequence
+///
+/// the insert sequence is converted to fwd-strand by assuming it is "attached" to bp1
+///
+void
+getFwdStrandInsertSegment(
+    const JumpAlignmentResult<int>& align,
+    const std::string& querySeq,
+    const bool isBp1Reversed,
+    std::string& insertSeq);
+
+
+/// TODO: document this if it serves a general purpose, or make private to AssembleSVBreakend
+int
+estimateBreakPointPos(const Alignment& al, const unsigned refOffset);
diff --git a/src/c++/lib/alignment/CMakeLists.txt b/src/c++/lib/alignment/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/alignment/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/alignment/GlobalAligner.hh b/src/c++/lib/alignment/GlobalAligner.hh
new file mode 100644
index 0000000..520d829
--- /dev/null
+++ b/src/c++/lib/alignment/GlobalAligner.hh
@@ -0,0 +1,124 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// derived from ELAND implementation by Tony Cox
+
+#pragma once
+
+#include "SingleRefAlignerShared.hh"
+
+
+/// \brief Implementation of global alignment with affine gap costs
+///
+/// alignment outputs start positions and CIGAR-style alignment
+/// expression of query to reference. Alignment of
+/// query is global -- query can "fall-off" either end of the reference,
+/// in this case, each unaligned position is given an "off-edge" score and
+/// the base is soft-clipped in the alignment
+///
+/// transition from insert to delete is free and allowed, but not reverse
+///
+template <typename ScoreType>
+struct GlobalAligner : public SingleRefAlignerBase<ScoreType>
+{
+    GlobalAligner(
+        const AlignmentScores<ScoreType>& scores) :
+        SingleRefAlignerBase<ScoreType>(scores)
+    {}
+
+    /// returns alignment path of query to reference
+    template <typename SymIter>
+    void
+    align(
+        const SymIter queryBegin, const SymIter queryEnd,
+        const SymIter refBegin, const SymIter refEnd,
+        AlignmentResult<ScoreType>& result) const;
+
+private:
+
+    // insert and delete are for query wrt reference
+    struct ScoreVal
+    {
+        ScoreType
+        getScore(const AlignState::index_t i) const
+        {
+            switch (i)
+            {
+            case AlignState::MATCH:
+                return match;
+            case AlignState::DELETE:
+                return del;
+            case AlignState::INSERT:
+                return ins;
+            default:
+                assert(false && "Unexpected Index Value");
+                return 0;
+            }
+        }
+
+        ScoreType match;
+        ScoreType del;
+        ScoreType ins;
+    };
+
+    struct PtrVal
+    {
+        typedef uint8_t code_t;
+
+        /// for state i, return the highest scoring previous state
+        /// to use during the backtrace?
+        AlignState::index_t
+        getStatePtr(const AlignState::index_t i) const
+        {
+            return static_cast<AlignState::index_t>(getStateCode(i));
+        }
+    private:
+        code_t
+        getStateCode(const AlignState::index_t i) const
+        {
+            switch (i)
+            {
+            case AlignState::MATCH:
+                return match;
+            case AlignState::DELETE:
+                return del;
+            case AlignState::INSERT:
+                return ins;
+            default:
+                assert(false && "Unexpected Index Value");
+                return 0;
+            }
+        }
+
+    public:
+        code_t match : 2;
+        code_t del : 2;
+        code_t ins : 2;
+    };
+
+    // add the matrices here to reduce allocations over many alignment calls:
+    typedef std::vector<ScoreVal> ScoreVec;
+    mutable ScoreVec _score1;
+    mutable ScoreVec _score2;
+    mutable basic_matrix<PtrVal> _ptrMat;
+};
+
+
+#include "alignment/GlobalAlignerImpl.hh"
diff --git a/src/c++/lib/alignment/GlobalAlignerImpl.hh b/src/c++/lib/alignment/GlobalAlignerImpl.hh
new file mode 100644
index 0000000..97d6991
--- /dev/null
+++ b/src/c++/lib/alignment/GlobalAlignerImpl.hh
@@ -0,0 +1,210 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// derived from ELAND implementation by Tony Cox
+
+
+#include <cassert>
+
+#ifdef DEBUG_ALN
+#include "blt_util/log.hh"
+#include <iostream>
+#endif
+
+
+
+template <typename ScoreType>
+template <typename SymIter>
+void
+GlobalAligner<ScoreType>::
+align(
+    const SymIter queryBegin, const SymIter queryEnd,
+    const SymIter refBegin, const SymIter refEnd,
+    AlignmentResult<ScoreType>& result) const
+{
+    result.clear();
+
+    const AlignmentScores<ScoreType>& scores(this->getScores());
+
+    const size_t querySize(std::distance(queryBegin, queryEnd));
+    const size_t refSize(std::distance(refBegin, refEnd));
+
+    assert(0 != querySize);
+    assert(0 != refSize);
+
+    _score1.resize(querySize+1);
+    _score2.resize(querySize+1);
+    _ptrMat.resize(querySize+1, refSize+1);
+
+    ScoreVec* thisSV(&_score1);
+    ScoreVec* prevSV(&_score2);
+
+    static const ScoreType badVal(-10000);
+
+    // global alignment of query
+    //
+    // disallow start from the delete state, control start from insert state with flag
+    //
+    // query can 'fall-off' the end of a short reference, in which case it will
+    // be soft-clipped and each base off the end will be scored as offEdge
+    //
+    for (unsigned queryIndex(0); queryIndex<=querySize; queryIndex++)
+    {
+        PtrVal& headPtr(_ptrMat.val(queryIndex,0));
+        ScoreVal& val((*thisSV)[queryIndex]);
+        headPtr.match = AlignState::MATCH;
+        val.match = queryIndex * scores.offEdge;
+        headPtr.del = AlignState::MATCH;
+        val.del = badVal;
+        if (not scores.isAllowEdgeInsertion)
+        {
+            headPtr.ins = AlignState::MATCH;
+            val.ins = badVal;
+        }
+        else
+        {
+            headPtr.ins = AlignState::INSERT;
+            val.ins = scores.open + (queryIndex * scores.extend);
+        }
+    }
+
+#ifdef DEBUG_ALN_MATRIX
+    // store full matrix of scores to print out later, don't turn this debug option on for large references!
+    std::vector<ScoreVec> storeScores;
+
+    storeScores.push_back(*thisSV);
+#endif
+
+    BackTrace<ScoreType> btrace;
+
+    {
+        unsigned refIndex(0);
+        for (SymIter refIter(refBegin); refIter != refEnd; ++refIter, ++refIndex)
+        {
+            std::swap(thisSV,prevSV);
+
+            {
+                // disallow start from the delete or insert state
+                PtrVal& headPtr(_ptrMat.val(0,refIndex+1));
+                ScoreVal& val((*thisSV)[0]);
+                headPtr.match = AlignState::MATCH;
+                val.match = 0;
+                headPtr.del = AlignState::MATCH;
+                val.del = badVal;
+                headPtr.ins = AlignState::MATCH;
+                val.ins = badVal;
+            }
+
+            unsigned queryIndex(0);
+            for (SymIter queryIter(queryBegin); queryIter != queryEnd; ++queryIter, ++queryIndex)
+            {
+                // update match
+                ScoreVal& headScore((*thisSV)[queryIndex+1]);
+                PtrVal& headPtr(_ptrMat.val(queryIndex+1,refIndex+1));
+                {
+                    const ScoreVal& sval((*prevSV)[queryIndex]);
+                    headPtr.match = this->max3(
+                                        headScore.match,
+                                        sval.match,
+                                        sval.del,
+                                        sval.ins);
+
+                    headScore.match += ((*queryIter==*refIter) ? scores.match : scores.mismatch);
+                }
+
+                // update delete
+                {
+                    const ScoreVal& sval((*prevSV)[queryIndex+1]);
+                    headPtr.del = this->max3(
+                                      headScore.del,
+                                      sval.match + scores.open,
+                                      sval.del,
+                                      sval.ins);
+
+                    headScore.del += scores.extend;
+                    if (0==queryIndex) headScore.del = badVal;
+                }
+
+                // update insert
+                {
+                    const ScoreVal& sval((*thisSV)[queryIndex]);
+                    headPtr.ins = this->max3(
+                                      headScore.ins,
+                                      sval.match + scores.open,
+                                      badVal,
+                                      sval.ins);
+
+                    headScore.ins += scores.extend;
+                    if (0==queryIndex) headScore.ins = badVal;
+                }
+
+#ifdef DEBUG_ALN
+                log_os << "i1i2: " << queryIndex+1 << " " << refIndex+1 << "\n";
+                log_os << headScore.match << ":" << headScore.del << ":" << headScore.ins << "/"
+                       << static_cast<int>(headPtr.match) << static_cast<int>(headPtr.del) << static_cast<int>(headPtr.ins) << "\n";
+#endif
+            }
+#ifdef DEBUG_ALN
+            log_os << "\n";
+#endif
+
+#ifdef DEBUG_ALN_MATRIX
+            storeScores.push_back(*thisSV);
+#endif
+
+            // get backtrace info:
+            {
+                const ScoreVal& sval((*thisSV)[querySize]);
+                updateBacktrace(sval.match,refIndex+1,querySize,btrace);
+            }
+        }
+    }
+
+    // optionally allow for trailing insertion
+    if (scores.isAllowEdgeInsertion)
+    {
+        const ScoreVal& sval((*thisSV)[querySize]);
+        updateBacktrace(sval.ins,refSize,querySize,btrace, AlignState::INSERT);
+    }
+
+    // also allow for the case where query falls-off the end of the reference:
+    for (unsigned queryIndex(0); queryIndex<querySize; queryIndex++)
+    {
+        const ScoreVal& sval((*thisSV)[queryIndex]);
+        const ScoreType thisMax(sval.match + (querySize-queryIndex) * scores.offEdge);
+        updateBacktrace(thisMax,refSize,queryIndex,btrace);
+    }
+
+#ifdef DEBUG_ALN_MATRIX
+    std::vector<AlignState::index_t> dumpStates {AlignState::MATCH, AlignState::DELETE, AlignState::INSERT};
+    this->dumpTables(queryBegin, queryEnd,
+                     refBegin, refEnd,
+                     querySize, _ptrMat,
+                     dumpStates, storeScores);
+#endif
+
+    this->backTraceAlignment(
+        queryBegin, queryEnd,
+        refBegin, refEnd,
+        querySize, refSize,
+        _ptrMat,
+        btrace, result);
+}
+
diff --git a/src/c++/lib/alignment/GlobalJumpAligner.hh b/src/c++/lib/alignment/GlobalJumpAligner.hh
new file mode 100644
index 0000000..49f4caa
--- /dev/null
+++ b/src/c++/lib/alignment/GlobalJumpAligner.hh
@@ -0,0 +1,140 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "JumpAlignerBase.hh"
+
+
+
+/// \brief a method to align a contig to two references
+///
+/// the alignment can make a single jump from reference1 to reference2
+///
+/// transition from insert to delete is free and allowed, but not reverse
+/// transition from/to jump to insert is free and allowed TODO: more restrictive
+///
+template <typename ScoreType>
+struct GlobalJumpAligner : public JumpAlignerBase<ScoreType>
+{
+    GlobalJumpAligner(
+        const AlignmentScores<ScoreType>& scores,
+        const ScoreType jumpScore) :
+        JumpAlignerBase<ScoreType>(scores,jumpScore)
+    {
+        // unsupported option:
+        assert (not scores.isAllowEdgeInsertion);
+    }
+
+    /// returns alignment path of query to reference
+    template <typename SymIter>
+    void
+    align(
+        const SymIter queryBegin, const SymIter queryEnd,
+        const SymIter ref1Begin, const SymIter ref1End,
+        const SymIter ref2Begin, const SymIter ref2End,
+        JumpAlignmentResult<ScoreType>& result) const;
+
+private:
+
+    // insert and delete are for seq1 wrt seq2
+    struct ScoreVal
+    {
+        ScoreType
+        getScore(const AlignState::index_t i) const
+        {
+            switch (i)
+            {
+            case AlignState::MATCH:
+                return match;
+            case AlignState::INSERT:
+                return ins;
+            case AlignState::DELETE:
+                return del;
+            case AlignState::JUMP:
+                return jump;
+            default:
+                assert(false && "Unexpected Index Value");
+                return 0;
+            }
+        }
+
+        ScoreType match;
+        ScoreType ins;
+        ScoreType del;
+        ScoreType jump;
+    };
+
+    struct PtrVal
+    {
+        typedef uint8_t code_t;
+
+        /// for state i, return the highest scoring previous state
+        /// to use during the backtrace?
+        AlignState::index_t
+        getStatePtr(const AlignState::index_t i) const
+        {
+            return static_cast<AlignState::index_t>(getStateCode(i));
+        }
+
+    private:
+        code_t
+        getStateCode(const AlignState::index_t i) const
+        {
+            switch (i)
+            {
+            case AlignState::MATCH:
+                return match;
+            case AlignState::INSERT:
+                return ins;
+            case AlignState::DELETE:
+                return del;
+            case AlignState::JUMP:
+                return jump;
+            default:
+                assert(false && "Unexpected Index Value");
+                return 0;
+            }
+        }
+
+    public:
+        // pack 2x4 bits into 1 byte:
+        code_t match : 2;
+        code_t ins : 2;
+        code_t del : 2;
+        code_t jump : 2;
+    };
+
+    // add the matrices here to reduce allocations over many alignment calls:
+    typedef std::vector<ScoreVal> ScoreVec;
+    mutable ScoreVec _score1;
+    mutable ScoreVec _score2;
+
+    typedef basic_matrix<PtrVal> PtrMat;
+    mutable PtrMat _ptrMat1;
+    mutable PtrMat _ptrMat2;
+};
+
+
+#include "alignment/GlobalJumpAlignerImpl.hh"
diff --git a/src/c++/lib/alignment/GlobalJumpAlignerImpl.hh b/src/c++/lib/alignment/GlobalJumpAlignerImpl.hh
new file mode 100644
index 0000000..5ed774c
--- /dev/null
+++ b/src/c++/lib/alignment/GlobalJumpAlignerImpl.hh
@@ -0,0 +1,340 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+//
+// \author Chris Saunders
+//
+
+#include <cassert>
+
+#ifdef DEBUG_ALN
+#include "blt_util/log.hh"
+#include <iostream>
+#endif
+
+
+
+template <typename ScoreType>
+template <typename SymIter>
+void
+GlobalJumpAligner<ScoreType>::
+align(
+    const SymIter queryBegin, const SymIter queryEnd,
+    const SymIter ref1Begin, const SymIter ref1End,
+    const SymIter ref2Begin, const SymIter ref2End,
+    JumpAlignmentResult<ScoreType>& result) const
+{
+    result.clear();
+
+    const AlignmentScores<ScoreType>& scores(this->getScores());
+
+    const size_t querySize(std::distance(queryBegin, queryEnd));
+    const size_t ref1Size(std::distance(ref1Begin, ref1End));
+    const size_t ref2Size(std::distance(ref2Begin, ref2End));
+
+    assert(0 != querySize);
+    assert(0 != ref1Size);
+    assert(0 != ref2Size);
+
+    _score1.resize(querySize+1);
+    _score2.resize(querySize+1);
+    _ptrMat1.resize(querySize+1, ref1Size+1);
+    _ptrMat2.resize(querySize+1, ref2Size+1);
+
+    ScoreVec* thisSV(&_score1);
+    ScoreVec* prevSV(&_score2);
+
+    static const ScoreType badVal(-10000);
+
+    // global alignment of query
+    //
+    // disallow start from the insert or delete state
+    //
+    // query can 'fall-off' the end of a short reference, in which case it will
+    // be soft-clipped and each base off the end will be scored as offEdge
+    //
+    for (unsigned queryIndex(0); queryIndex<=querySize; queryIndex++)
+    {
+        PtrVal& headPtr1(_ptrMat1.val(queryIndex,0));
+        PtrVal& headPtr2(_ptrMat2.val(queryIndex,0));
+        ScoreVal& val((*thisSV)[queryIndex]);
+        headPtr1.match = AlignState::MATCH;
+        headPtr2.match = AlignState::MATCH;
+        val.match = queryIndex * scores.offEdge;
+        headPtr1.del = AlignState::MATCH;
+        headPtr2.del = AlignState::MATCH;
+        val.del = badVal;
+        headPtr1.ins = AlignState::MATCH;
+        headPtr2.ins = AlignState::MATCH;
+        val.ins = badVal;
+        headPtr1.jump = AlignState::MATCH;
+        headPtr2.jump = AlignState::MATCH;
+        val.jump = badVal;
+    }
+
+#ifdef DEBUG_ALN_MATRIX
+    // store full matrix of scores to print out later, don't turn this debug option on for large references!
+    std::vector<ScoreVec> storeScores;
+
+    storeScores.push_back(*thisSV);
+#endif
+
+
+    BackTrace<ScoreType> btrace;
+
+    {
+        unsigned ref1Index(0);
+        for (SymIter ref1Iter(ref1Begin); ref1Iter != ref1End; ++ref1Iter, ++ref1Index)
+        {
+            std::swap(thisSV,prevSV);
+
+            {
+                // disallow start from the insert or delete state:
+                PtrVal& headPtr(_ptrMat1.val(0,ref1Index+1));
+                ScoreVal& val((*thisSV)[0]);
+                headPtr.match = AlignState::MATCH;
+                val.match = 0;
+                headPtr.del = AlignState::MATCH;
+                val.del = badVal;
+                headPtr.ins = AlignState::MATCH;
+                val.ins = badVal;
+                headPtr.jump = AlignState::MATCH;
+                val.jump = badVal;
+            }
+
+            unsigned queryIndex(0);
+            for (SymIter queryIter(queryBegin); queryIter != queryEnd; ++queryIter, ++queryIndex)
+            {
+                // update match
+                ScoreVal& headScore((*thisSV)[queryIndex+1]);
+                PtrVal& headPtr(_ptrMat1.val(queryIndex+1,ref1Index+1));
+                {
+                    const ScoreVal& sval((*prevSV)[queryIndex]);
+                    headPtr.match = this->max3(
+                                        headScore.match,
+                                        sval.match,
+                                        sval.del,
+                                        sval.ins);
+
+                    headScore.match += ((*queryIter==*ref1Iter) ? scores.match : scores.mismatch);
+                }
+
+                // update delete
+                {
+                    const ScoreVal& sval((*prevSV)[queryIndex+1]);
+                    headPtr.del = this->max3(
+                                      headScore.del,
+                                      sval.match + scores.open,
+                                      sval.del,
+                                      sval.ins);
+
+                    headScore.del += scores.extend;
+                    if (0==queryIndex) headScore.del = badVal;
+                }
+
+                // update insert
+                {
+                    const ScoreVal& sval((*thisSV)[queryIndex]);
+                    headPtr.ins = this->max3(
+                                      headScore.ins,
+                                      sval.match + scores.open,
+                                      badVal,
+                                      sval.ins);
+
+                    headScore.ins += scores.extend;
+                    if (0==queryIndex) headScore.ins = badVal;
+                }
+
+                // update jump
+                {
+                    const ScoreVal& sval((*prevSV)[queryIndex+1]);
+                    headPtr.jump = this->max4(
+                                       headScore.jump,
+                                       headScore.match + this->getJumpScore(),
+                                       badVal,
+                                       headScore.ins + this->getJumpScore(),
+                                       sval.jump);
+                }
+
+#ifdef DEBUG_ALN
+                log_os << "queryIdx refIdx ref1Idx: " << queryIndex+1 << " " << ref1Index+1 << " " << ref1Index+1 << "\n";
+                log_os << "MIDJ: " << headScore.match << ":" << headScore.ins << ":" << headScore.del << ":" << headScore.jump << "/"
+                       << static_cast<int>(headPtr.match) << static_cast<int>(headPtr.ins) << static_cast<int>(headPtr.del) << static_cast<int>(headPtr.jump) << "\n";
+                log_os << "QuerySymbol:" << *queryIter << " RefSymbol:" << *ref1Iter << "\n";
+#endif
+            }
+#ifdef DEBUG_ALN
+            log_os << "\n";
+#endif
+
+#ifdef DEBUG_ALN_MATRIX
+            storeScores.push_back(*thisSV);
+#endif
+
+            // get backtrace info:
+            {
+                const ScoreVal& sval((*thisSV)[querySize]);
+                updateBacktrace(sval.match, ref1Index+1, querySize, btrace);
+            }
+        }
+    }
+
+    // in the backtrace start search, also allow for the case where the query falls-off the end of the reference:
+    for (unsigned queryIndex(0); queryIndex<querySize; queryIndex++)
+    {
+        const ScoreVal& sval((*thisSV)[queryIndex]);
+        const ScoreType thisMax(sval.match + (querySize-queryIndex) * scores.offEdge);
+        updateBacktrace(thisMax, ref1Size, queryIndex, btrace);
+    }
+
+
+    {
+        for (unsigned queryIndex(0); queryIndex<=querySize; queryIndex++)
+        {
+            ScoreVal& val((*thisSV)[queryIndex]);
+            val.match = queryIndex * scores.offEdge;
+            val.del = badVal;
+            val.ins = badVal;
+            //val.jump = badVal; // preserve jump setting from last iteration of ref1
+        }
+
+#ifdef DEBUG_ALN_MATRIX
+        storeScores.push_back(*thisSV);
+#endif
+
+        unsigned ref2Index(0);
+        for (SymIter ref2Iter(ref2Begin); ref2Iter != ref2End; ++ref2Iter, ++ref2Index)
+        {
+            std::swap(thisSV,prevSV);
+
+            {
+                // disallow start from the insert or delete state:
+                PtrVal& headPtr(_ptrMat2.val(0,ref2Index+1));
+                ScoreVal& val((*thisSV)[0]);
+                headPtr.match = AlignState::MATCH;
+                val.match = 0;
+                headPtr.del = AlignState::MATCH;
+                val.del = badVal;
+                headPtr.ins = AlignState::MATCH;
+                val.ins = badVal;
+                headPtr.jump = AlignState::MATCH;
+                val.jump = badVal;
+            }
+
+            unsigned queryIndex(0);
+            for (SymIter queryIter(queryBegin); queryIter != queryEnd; ++queryIter, ++queryIndex)
+            {
+                // update match
+                ScoreVal& headScore((*thisSV)[queryIndex+1]);
+                PtrVal& headPtr(_ptrMat2.val(queryIndex+1,ref2Index+1));
+                {
+                    const ScoreVal& sval((*prevSV)[queryIndex]);
+                    headPtr.match = this->max4(
+                                        headScore.match,
+                                        sval.match,
+                                        sval.del,
+                                        sval.ins,
+                                        sval.jump);
+
+                    headScore.match += ((*queryIter==*ref2Iter) ? scores.match : scores.mismatch);
+                }
+
+                // update delete
+                {
+                    const ScoreVal& sval((*prevSV)[queryIndex+1]);
+                    headPtr.del = this->max3(
+                                      headScore.del,
+                                      sval.match + scores.open,
+                                      sval.del,
+                                      sval.ins);
+
+                    headScore.del += scores.extend;
+                }
+
+                // update insert
+                {
+                    const ScoreVal& sval((*thisSV)[queryIndex]);
+                    headPtr.ins = this->max4(
+                                      headScore.ins,
+                                      sval.match + scores.open,
+                                      badVal,
+                                      sval.ins,
+                                      sval.jump); // jump->ins moves get a pass on the gap-open penalty, to support breakend insertions
+
+                    headScore.ins += scores.extend;
+                }
+
+                // update jump
+                {
+                    const ScoreVal& sval((*prevSV)[queryIndex+1]);
+                    headPtr.jump = AlignState::JUMP;
+                    headScore.jump = sval.jump;
+                }
+
+#ifdef DEBUG_ALN
+                log_os << "queryIdx refIdx ref2Idx: " << queryIndex+1 << " " << ref1Size+ref2Index+1 << " " << ref2Index+1 << "\n";
+                log_os << "MIDJ: " << headScore.match << ":" << headScore.ins << ":" << headScore.del << ":" << headScore.jump << "/"
+                       << static_cast<int>(headPtr.match) << static_cast<int>(headPtr.ins) << static_cast<int>(headPtr.del) << static_cast<int>(headPtr.jump) << "\n";
+                log_os << "QuerySymbol:" << *queryIter << " RefSymbol:" << *ref2Iter << "\n";
+#endif
+            }
+#ifdef DEBUG_ALN
+            log_os << "\n";
+#endif
+
+#ifdef DEBUG_ALN_MATRIX
+            storeScores.push_back(*thisSV);
+#endif
+
+            // get backtrace start info:
+            {
+                const ScoreVal& sval((*thisSV)[querySize]);
+                updateBacktrace(sval.match, ref1Size+ref2Index+1, querySize, btrace);
+            }
+        }
+
+    }
+
+    // in the backtrace start search, also allow for the case where the query falls-off the end of the reference:
+    for (unsigned queryIndex(0); queryIndex<querySize; queryIndex++)
+    {
+        const ScoreVal& sval((*thisSV)[queryIndex]);
+        const ScoreType thisMax(sval.match + (querySize-queryIndex) * scores.offEdge);
+        updateBacktrace(thisMax, ref1Size+ref2Size, queryIndex, btrace);
+    }
+
+#ifdef DEBUG_ALN_MATRIX
+    std::vector<AlignState::index_t> dumpStates {AlignState::MATCH, AlignState::DELETE, AlignState::INSERT, AlignState::JUMP};
+    this->dumpTables(queryBegin, queryEnd,
+                     ref1Begin, ref1End,
+                     ref2Begin, ref2End,
+                     querySize,
+                     _ptrMat1, _ptrMat2,
+                     dumpStates,storeScores);
+#endif
+
+    this->backTraceAlignment(
+        queryBegin, queryEnd,
+        ref1Begin, ref1End,
+        ref2Begin, ref2End,
+        querySize, ref1Size, ref2Size,
+        _ptrMat1, _ptrMat2, btrace, result);
+}
+
diff --git a/src/c++/lib/alignment/GlobalJumpIntronAligner.hh b/src/c++/lib/alignment/GlobalJumpIntronAligner.hh
new file mode 100644
index 0000000..573ca92
--- /dev/null
+++ b/src/c++/lib/alignment/GlobalJumpIntronAligner.hh
@@ -0,0 +1,150 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders and Felix Schlesinger
+///
+
+#pragma once
+
+#include "JumpAlignerBase.hh"
+
+
+/// \brief a method to align a contig to two references
+///
+/// the alignment can make a single jump from reference1 to reference2
+///
+template <typename ScoreType>
+struct GlobalJumpIntronAligner : public JumpAlignerBase<ScoreType>
+{
+    GlobalJumpIntronAligner(
+        const AlignmentScores<ScoreType>& scores,
+        const ScoreType jumpScore,
+        const ScoreType intronOpenScore,
+        const ScoreType intronOffEdgeScore) :
+        JumpAlignerBase<ScoreType>(scores,jumpScore),
+        _intronOpenScore(intronOpenScore),
+        _intronOffEdgeScore(intronOffEdgeScore)
+    {
+        // unsupported option:
+        assert (not scores.isAllowEdgeInsertion);
+    }
+
+    /// returns alignment path of query to reference
+    template <typename SymIter>
+    void
+    align(
+        const SymIter queryBegin, const SymIter queryEnd,
+        const SymIter ref1Begin, const SymIter ref1End,
+        const SymIter ref2Begin, const SymIter ref2End,
+        bool ref1Fw, bool ref2Fw, bool isStranded,
+        JumpAlignmentResult<ScoreType>& result) const;
+
+private:
+
+    // insert and delete are for seq1 wrt seq2
+    struct ScoreVal
+    {
+        ScoreType
+        getScore(const AlignState::index_t i) const
+        {
+            switch (i)
+            {
+            case AlignState::MATCH:
+                return match;
+            case AlignState::INSERT:
+                return ins;
+            case AlignState::DELETE:
+                return del;
+            case AlignState::JUMP:
+                return jump;
+            case AlignState::SPLICE:
+                return intron;
+            default:
+                assert(false && "Unexpected Index Value");
+                return 0;
+            }
+        }
+
+        ScoreType match;
+        ScoreType ins;
+        ScoreType del;
+        ScoreType jump;
+        ScoreType intron;
+    };
+
+    struct PtrVal
+    {
+        typedef uint16_t code_t;
+
+        /// for state i, return the highest scoring previous state
+        /// to use during the backtrace?
+        AlignState::index_t
+        getStatePtr(const AlignState::index_t i) const
+        {
+            return static_cast<AlignState::index_t>(getStateCode(i));
+        }
+
+    private:
+        code_t
+        getStateCode(const AlignState::index_t i) const
+        {
+            switch (i)
+            {
+            case AlignState::MATCH:
+                return match;
+            case AlignState::INSERT:
+                return ins;
+            case AlignState::DELETE:
+                return del;
+            case AlignState::JUMP:
+                return jump;
+            case AlignState::SPLICE:
+                return intron;
+            default:
+                assert(false && "Unexpected Index Value");
+                return 0;
+            }
+        }
+
+    public:
+        /// pack 3x5 bits into a single uint16_t:
+        code_t match : 3;
+        code_t ins : 3;
+        code_t del : 3;
+        code_t jump : 3;
+        code_t intron : 3;
+    };
+
+    const ScoreType _intronOpenScore; ///< gap open for introns (i.e. deletions starting with splice motif) (should be negative)
+    const ScoreType _intronOffEdgeScore; ///< As offEdge but only of the last aligned bases match a splice motif (should be negative) todo: not implemented
+
+    // add the matrices here to reduce allocations over many alignment calls:
+    typedef std::vector<ScoreVal> ScoreVec;
+    mutable ScoreVec _score1;
+    mutable ScoreVec _score2;
+
+    typedef basic_matrix<PtrVal> PtrMat;
+    mutable PtrMat _ptrMat1;
+    mutable PtrMat _ptrMat2;
+};
+
+
+#include "alignment/GlobalJumpIntronAlignerImpl.hh"
diff --git a/src/c++/lib/alignment/GlobalJumpIntronAlignerImpl.hh b/src/c++/lib/alignment/GlobalJumpIntronAlignerImpl.hh
new file mode 100644
index 0000000..0171953
--- /dev/null
+++ b/src/c++/lib/alignment/GlobalJumpIntronAlignerImpl.hh
@@ -0,0 +1,450 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+//
+// \author Chris Saunders and Felix Schlesinger
+//
+
+#include "AlignerUtil.hh"
+
+#include <cassert>
+
+#ifdef DEBUG_ALN
+#include "blt_util/log.hh"
+#include <iostream>
+#endif
+
+
+
+template <typename SymIter>
+bool
+isUpstreamSpliceAcceptor(
+    const SymIter refBegin,
+    SymIter refIter,
+    bool fwStrand,
+    bool isStranded)
+{
+    if (std::distance(refBegin,refIter)<2) return false;
+    if (((fwStrand) || (!isStranded)) && (*(refIter - 2) == 'A' && (*(refIter - 1) == 'G')))
+        return true;
+    if (((!fwStrand) || (!isStranded)) && (*(refIter - 2) == 'A' && (*(refIter - 1) == 'C')))
+        return true;
+    return false;
+}
+
+
+
+template <typename SymIter>
+bool
+isDownstreamSpliceDonor(
+    SymIter refIter,
+    const SymIter refEnd,
+    bool fwStrand,
+    bool isStranded)
+{
+    if (std::distance(refIter,refEnd)<2) return false;
+    if (((fwStrand) || (!isStranded)) && (*(refIter) == 'G' && (*(refIter + 1) == 'T')))
+        return true;
+    if (((!fwStrand) || (!isStranded)) && (*(refIter) == 'C' && (*(refIter + 1) == 'T')))
+        return true;
+    return false;
+}
+
+
+template <typename ScoreType>
+template <typename SymIter>
+void
+GlobalJumpIntronAligner<ScoreType>::
+align(
+    const SymIter queryBegin, const SymIter queryEnd,
+    const SymIter ref1Begin, const SymIter ref1End,
+    const SymIter ref2Begin, const SymIter ref2End,
+    bool ref1Fw, bool ref2Fw, bool isStranded,
+    JumpAlignmentResult<ScoreType>& result) const
+{
+    result.clear();
+
+    const AlignmentScores<ScoreType>& scores(this->getScores());
+
+    const size_t querySize(std::distance(queryBegin, queryEnd));
+    const size_t ref1Size(std::distance(ref1Begin, ref1End));
+    const size_t ref2Size(std::distance(ref2Begin, ref2End));
+
+    assert(0 != querySize);
+    assert(0 != ref1Size);
+    assert(0 != ref2Size);
+
+    _score1.resize(querySize+1);
+    _score2.resize(querySize+1);
+    _ptrMat1.resize(querySize+1, ref1Size+1);
+    _ptrMat2.resize(querySize+1, ref2Size+1);
+
+    ScoreVec* thisSV(&_score1);
+    ScoreVec* prevSV(&_score2);
+
+    static const ScoreType badVal(-10000);
+
+    // global alignment of query
+    //
+    // disallow start from the delete state, control start from insert state with flag
+    //
+    // query can 'fall-off' the end of a short reference, in which case it will
+    // be soft-clipped and each base off the end will be scored as offEdge
+    //
+    for (unsigned queryIndex(0); queryIndex<=querySize; queryIndex++)
+    {
+        PtrVal& headPtr1(_ptrMat1.val(queryIndex,0));
+        PtrVal& headPtr2(_ptrMat2.val(queryIndex,0));
+        ScoreVal& val((*thisSV)[queryIndex]);
+        headPtr1.match = AlignState::MATCH;
+        headPtr2.match = AlignState::MATCH;
+        val.match = queryIndex * scores.offEdge;
+        headPtr1.del = AlignState::MATCH;
+        headPtr2.del = AlignState::MATCH;
+        val.del = badVal;
+        headPtr1.ins = AlignState::MATCH;
+        headPtr2.ins = AlignState::MATCH;
+        val.ins = badVal;
+        headPtr1.jump = AlignState::MATCH;
+        headPtr2.jump = AlignState::MATCH;
+        val.jump = badVal;
+        headPtr1.intron = AlignState::MATCH;
+        headPtr2.intron = AlignState::MATCH;
+        val.intron = queryIndex * _intronOffEdgeScore;
+    }
+
+#ifdef DEBUG_ALN_MATRIX
+    // store full matrix of scores to print out later, don't turn this debug option on for large references!
+    std::vector<ScoreVec> storeScores;
+
+    storeScores.push_back(*thisSV);
+#endif
+
+    BackTrace<ScoreType> btrace;
+
+    {
+        unsigned ref1Index(0);
+        for (SymIter ref1Iter(ref1Begin); ref1Iter != ref1End; ++ref1Iter, ++ref1Index)
+        {
+            std::swap(thisSV,prevSV);
+
+            {
+                // disallow start from the insert or delete state:
+                PtrVal& headPtr(_ptrMat1.val(0,ref1Index+1));
+                ScoreVal& val((*thisSV)[0]);
+                headPtr.match = AlignState::MATCH;
+                val.match = 0;
+                headPtr.del = AlignState::MATCH;
+                val.del = badVal;
+                headPtr.ins = AlignState::MATCH;
+                val.ins = badVal;
+                headPtr.jump = AlignState::MATCH;
+                val.jump = badVal;
+                headPtr.intron = AlignState::MATCH;
+                val.intron = badVal;
+            }
+
+            unsigned queryIndex(0);
+            for (SymIter queryIter(queryBegin); queryIter != queryEnd; ++queryIter, ++queryIndex)
+            {
+                // update match
+                ScoreVal& headScore((*thisSV)[queryIndex+1]);
+                PtrVal& headPtr(_ptrMat1.val(queryIndex+1,ref1Index+1));
+                {
+                    const ScoreVal& sval((*prevSV)[queryIndex]);
+                    headPtr.match = this->max3(
+                                        headScore.match,
+                                        sval.match,
+                                        sval.del,
+                                        sval.ins);
+                    // Only can leave the intron (splice) state if the last two
+                    // bases of the intron match the motif
+                    if (isUpstreamSpliceAcceptor(ref1Begin,ref1Iter, ref1Fw, isStranded))
+                    {
+                        if (headScore.match < sval.intron)
+                        {
+                            headScore.match = sval.intron;
+                            headPtr.match = AlignState::SPLICE;
+                        }
+                    }
+                    headScore.match += ((*queryIter==*ref1Iter) ? scores.match : scores.mismatch);
+                }
+
+                // update delete
+                {
+                    const ScoreVal& sval((*prevSV)[queryIndex+1]);
+                    headPtr.del = this->max3(
+                                      headScore.del,
+                                      sval.match + scores.open,
+                                      sval.del,
+                                      sval.ins + scores.open);
+
+                    headScore.del += scores.extend;
+                    if (0==queryIndex) headScore.del = badVal;
+                }
+
+                // update insert
+                {
+                    const ScoreVal& sval((*thisSV)[queryIndex]);
+                    headPtr.ins = this->max3(
+                                      headScore.ins,
+                                      sval.match + scores.open,
+                                      badVal, // disallow D->I (but I->D is allowed above)
+                                      sval.ins);
+
+                    headScore.ins += scores.extend;
+                    if (0==queryIndex) headScore.ins = badVal;
+                }
+                // update intron / splice
+                {
+                    const ScoreVal& sval((*prevSV)[queryIndex+1]);
+                    headPtr.intron = AlignState::SPLICE;
+                    headScore.intron = sval.intron;
+                    // Only can enter the intron (splice) state if the first two
+                    // bases of the intron match the motif
+                    if (isDownstreamSpliceDonor(ref1Iter, ref1End, ref1Fw, isStranded))
+                    {
+                        if (sval.match + _intronOpenScore > sval.intron)
+                        {
+                            headScore.intron = sval.match + _intronOpenScore;
+                            headPtr.intron = AlignState::MATCH;
+                        }
+                    }
+                }
+                // update jump
+                {
+                    const ScoreVal& sval((*prevSV)[queryIndex+1]);
+                    headPtr.jump = this->max4(
+                                       headScore.jump,
+                                       headScore.match + this->getJumpScore(),
+                                       badVal,
+                                       headScore.ins + this->getJumpScore(),
+                                       sval.jump);
+                }
+
+#ifdef DEBUG_ALN
+                log_os << "queryIdx refIdx ref1Idx: " << queryIndex+1 << " " << ref1Index+1 << " " << ref1Index+1 << "\n";
+                log_os << "MIDJS: "<< headScore.match << ":" << headScore.ins << ":" << headScore.del << ":" << headScore.jump << ":" << headScore.intron << "/"
+                       << static_cast<int>(headPtr.match) << static_cast<int>(headPtr.ins) << static_cast<int>(headPtr.del) << static_cast<int>(headPtr.jump)  << static_cast<int>(headPtr.intron) << "\n";
+                log_os << "QuerySymbol:" << *queryIter << " RefSymbol:" << *ref1Iter << "\n";
+#endif
+            }
+#ifdef DEBUG_ALN
+            log_os << "\n";
+#endif
+
+#ifdef DEBUG_ALN_MATRIX
+            storeScores.push_back(*thisSV);
+#endif
+
+            // get backtrace info:
+            {
+                const ScoreVal& sval((*thisSV)[querySize]);
+                updateBacktrace(sval.match, ref1Index+1, querySize, btrace);
+            }
+        }
+    }
+
+    // in the backtrace start search, also allow for the case where the query falls-off the end of the reference:
+    for (unsigned queryIndex(0); queryIndex<querySize; queryIndex++)
+    {
+        const ScoreVal& sval((*thisSV)[queryIndex]);
+        const ScoreType maxMatch(sval.match + (querySize - queryIndex) * scores.offEdge);
+        const ScoreType maxIntronOffEdge(sval.intron + (querySize - queryIndex) * _intronOffEdgeScore);
+        if (maxMatch >= maxIntronOffEdge)
+        {
+            updateBacktrace(maxMatch, ref1Size, queryIndex, btrace, AlignState::MATCH);
+        }
+        else
+        {
+            updateBacktrace(maxIntronOffEdge, ref1Size, queryIndex, btrace, AlignState::SPLICE);
+        }
+    }
+
+
+    {
+        for (unsigned queryIndex(0); queryIndex<=querySize; queryIndex++)
+        {
+            ScoreVal& val((*thisSV)[queryIndex]);
+            val.match = queryIndex * scores.offEdge;
+            val.del = badVal;
+            val.ins = badVal;
+            val.intron = queryIndex * _intronOffEdgeScore;
+            //val.jump = badVal; // preserve jump setting from last iteration of ref1
+        }
+
+#ifdef DEBUG_ALN_MATRIX
+        storeScores.push_back(*thisSV);
+#endif
+
+        unsigned ref2Index(0);
+        for (SymIter ref2Iter(ref2Begin); ref2Iter != ref2End; ++ref2Iter, ++ref2Index)
+        {
+            std::swap(thisSV,prevSV);
+
+            {
+                // disallow start from the insert or delete state:
+                PtrVal& headPtr(_ptrMat2.val(0,ref2Index+1));
+                ScoreVal& val((*thisSV)[0]);
+                headPtr.match = AlignState::MATCH;
+                val.match = 0;
+                headPtr.del = AlignState::MATCH;
+                val.del = badVal;
+                headPtr.ins = AlignState::MATCH;
+                val.ins = badVal;
+                headPtr.jump = AlignState::MATCH;
+                val.jump = badVal;
+                headPtr.intron = AlignState::MATCH;
+                val.intron = badVal;
+            }
+
+            unsigned queryIndex(0);
+            for (SymIter queryIter(queryBegin); queryIter != queryEnd; ++queryIter, ++queryIndex)
+            {
+                // update match
+                ScoreVal& headScore((*thisSV)[queryIndex+1]);
+                PtrVal& headPtr(_ptrMat2.val(queryIndex+1,ref2Index+1));
+                {
+                    const ScoreVal& sval((*prevSV)[queryIndex]);
+                    headPtr.match = this->max4(
+                                        headScore.match,
+                                        sval.match,
+                                        sval.del,
+                                        sval.ins,
+                                        sval.jump);
+                    // Only can leave the intron (splice) state if the last two
+                    // bases of the intron match the motif
+                    if (isUpstreamSpliceAcceptor(ref2Begin,ref2Iter, ref2Fw, isStranded))
+                    {
+                        if (headScore.match < sval.intron)
+                        {
+                            headScore.match = sval.intron;
+                            headPtr.match = AlignState::SPLICE;
+                        }
+                    }
+
+                    headScore.match += ((*queryIter==*ref2Iter) ? scores.match : scores.mismatch);
+                }
+
+                // update delete
+                {
+                    const ScoreVal& sval((*prevSV)[queryIndex+1]);
+                    headPtr.del = this->max3(
+                                      headScore.del,
+                                      sval.match + scores.open,
+                                      sval.del,
+                                      sval.ins + scores.open);
+
+                    headScore.del += scores.extend;
+                }
+
+                // update insert
+                {
+                    const ScoreVal& sval((*thisSV)[queryIndex]);
+                    headPtr.ins = this->max4(
+                                      headScore.ins,
+                                      sval.match + scores.open,
+                                      badVal, // disallow D->I (but I->D is allowed above)
+                                      sval.ins,
+                                      sval.jump); // jump->ins moves get a pass on the gap-open penalty, to support mirco-insertions
+
+                    headScore.ins += scores.extend;
+                }
+                // update intron
+                {
+                    const ScoreVal& sval((*prevSV)[queryIndex+1]);
+                    headPtr.intron = AlignState::SPLICE;
+                    headScore.intron = sval.intron;
+                    // Only can enter the intron (splice) state if the first two
+                    // bases of the intron match the motif
+                    if (isDownstreamSpliceDonor(ref2Iter, ref2End, ref2Fw, isStranded))
+                    {
+                        if (sval.match + _intronOpenScore > sval.intron)
+                        {
+                            headScore.intron = sval.match + _intronOpenScore;
+                            headPtr.intron = AlignState::MATCH;
+                        }
+                    }
+                }
+                // update jump
+                {
+                    const ScoreVal& sval((*prevSV)[queryIndex+1]);
+                    headPtr.jump = AlignState::JUMP;
+                    headScore.jump = sval.jump;
+                }
+
+#ifdef DEBUG_ALN
+                log_os << "queryIdx refIdx ref2Idx: " << queryIndex+1 << " " << ref1Size+ref2Index+1 << " " << ref2Index+1 << "\n";
+                log_os << "MIDJS: "<< headScore.match << ":" << headScore.ins << ":" << headScore.del << ":" << headScore.jump << ":" << headScore.intron << "/"
+                       << static_cast<int>(headPtr.match) << static_cast<int>(headPtr.ins) << static_cast<int>(headPtr.del) << static_cast<int>(headPtr.jump)  << static_cast<int>(headPtr.intron) << "\n";
+                log_os << "QuerySymbol:" << *queryIter << " RefSymbol:" << *ref2Iter << "\n";
+#endif
+            }
+#ifdef DEBUG_ALN
+            log_os << "\n";
+#endif
+
+#ifdef DEBUG_ALN_MATRIX
+            storeScores.push_back(*thisSV);
+#endif
+
+            // get backtrace start info:
+            {
+                const ScoreVal& sval((*thisSV)[querySize]);
+                updateBacktrace(sval.match, ref1Size+ref2Index+1, querySize, btrace);
+            }
+        }
+    }
+
+    // in the backtrace start search, also allow for the case where the query falls-off the end of the reference:
+    for (unsigned queryIndex(0); queryIndex<querySize; queryIndex++)
+    {
+        const ScoreVal& sval((*thisSV)[queryIndex]);
+        const ScoreType maxMatch(sval.match + (querySize - queryIndex) * scores.offEdge);
+        const ScoreType maxIntronOffEdge(sval.intron + (querySize - queryIndex) * _intronOffEdgeScore);
+        if (maxMatch >= maxIntronOffEdge)
+        {
+            updateBacktrace(maxMatch, ref1Size + ref2Size, queryIndex, btrace, AlignState::MATCH);
+        }
+        else
+        {
+            updateBacktrace(maxIntronOffEdge, ref1Size + ref2Size, queryIndex, btrace, AlignState::SPLICE);
+        }
+    }
+
+#ifdef DEBUG_ALN_MATRIX
+    std::vector<AlignState::index_t> dumpStates {AlignState::MATCH, AlignState::DELETE, AlignState::INSERT, AlignState::JUMP, AlignState::SPLICE};
+    this->dumpTables(queryBegin, queryEnd,
+                     ref1Begin, ref1End,
+                     ref2Begin, ref2End,
+                     querySize,
+                     _ptrMat1, _ptrMat2,
+                     dumpStates,storeScores);
+#endif
+
+    this->backTraceAlignment(
+        queryBegin, queryEnd,
+        ref1Begin, ref1End,
+        ref2Begin, ref2End,
+        querySize, ref1Size, ref2Size,
+        _ptrMat1, _ptrMat2,
+        btrace, result);
+}
+
diff --git a/src/c++/lib/alignment/GlobalLargeIndelAligner.hh b/src/c++/lib/alignment/GlobalLargeIndelAligner.hh
new file mode 100644
index 0000000..c80734d
--- /dev/null
+++ b/src/c++/lib/alignment/GlobalLargeIndelAligner.hh
@@ -0,0 +1,183 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "SingleRefAlignerShared.hh"
+
+
+/// \brief align a contig to reference and allow very large insert or deletion events
+///
+/// this is essentially a regular global alignment with affine gap penalties for small
+/// indels and a non-affine gap state for large indels, the open cost for large indels
+/// is (typically) much higher but the extension cost is zero.
+///
+/// transition from insert to delete is free and allowed
+/// transition from jumpDel to insert is free and allowed
+/// transition from jumpIns to delete is free and allowed
+///
+template <typename ScoreType>
+struct GlobalLargeIndelAligner : public SingleRefAlignerBase<ScoreType>
+{
+    /// \param largeIndelScore is the 'gap open' for the large indels
+    ///
+    GlobalLargeIndelAligner(
+        const AlignmentScores<ScoreType>& scores,
+        const ScoreType largeIndelScore) :
+        SingleRefAlignerBase<ScoreType>(scores),
+        _largeIndelScore(largeIndelScore)
+    {}
+
+    /// returns alignment path of query to reference
+    template <typename SymIter>
+    void
+    align(
+        const SymIter queryBegin, const SymIter queryEnd,
+        const SymIter refBegin, const SymIter refEnd,
+        AlignmentResult<ScoreType>& result) const;
+
+private:
+
+    // insert and delete are for seq1 wrt seq2
+    struct ScoreVal
+    {
+        ScoreType
+        getScore(const AlignState::index_t i) const
+        {
+            switch (i)
+            {
+            case AlignState::MATCH:
+                return match;
+            case AlignState::INSERT:
+                return ins;
+            case AlignState::DELETE:
+                return del;
+            case AlignState::JUMP:
+                return jumpDel;
+            case AlignState::JUMPINS:
+                return jumpIns;
+            default:
+                assert(false && "Unexpected Index Value");
+                return 0;
+            }
+        }
+
+        ScoreType match;
+        ScoreType ins;
+        ScoreType del;
+        ScoreType jumpIns;
+        ScoreType jumpDel;
+    };
+
+    struct PtrVal
+    {
+        typedef uint16_t code_t;
+
+        /// for state i, return the highest scoring previous state
+        /// to use during the backtrace?
+        AlignState::index_t
+        getStatePtr(const AlignState::index_t i) const
+        {
+            return static_cast<AlignState::index_t>(getStateCode(i));
+        }
+
+    private:
+        code_t
+        getStateCode(const AlignState::index_t i) const
+        {
+            switch (i)
+            {
+            case AlignState::MATCH:
+                return match;
+            case AlignState::INSERT:
+                return ins;
+            case AlignState::DELETE:
+                return del;
+            case AlignState::JUMP:
+                return jumpDel;
+            case AlignState::JUMPINS:
+                return jumpIns;
+            default:
+                assert(false && "Unexpected Index Value");
+                return 0;
+            }
+        }
+
+    public:
+        /// pack 3x5 bits into 2 bytes:
+        code_t match : 3;
+        code_t ins : 3;
+        code_t del : 3;
+        code_t jumpDel : 3;
+        code_t jumpIns : 3;
+    };
+
+    static
+    uint8_t
+    max5(
+        ScoreType& max,
+        const ScoreType v0,
+        const ScoreType v1,
+        const ScoreType v2,
+        const ScoreType v3,
+        const ScoreType v4)
+    {
+        max=v0;
+        uint8_t ptr=0;
+        if (v1>max)
+        {
+            max=v1;
+            ptr=1;
+        }
+        if (v2>max)
+        {
+            max=v2;
+            ptr=2;
+        }
+        if (v3>max)
+        {
+            max=v3;
+            ptr=3;
+        }
+        if (v4>max)
+        {
+            max=v4;
+            ptr=4;
+        }
+        return ptr;
+    }
+
+    // add the matrices here to reduce allocations over many alignment calls:
+    typedef std::vector<ScoreVal> ScoreVec;
+    mutable ScoreVec _score1;
+    mutable ScoreVec _score2;
+
+    typedef basic_matrix<PtrVal> PtrMat;
+    mutable PtrMat _ptrMat;
+
+    const ScoreType _largeIndelScore;
+};
+
+
+#include "alignment/GlobalLargeIndelAlignerImpl.hh"
diff --git a/src/c++/lib/alignment/GlobalLargeIndelAlignerImpl.hh b/src/c++/lib/alignment/GlobalLargeIndelAlignerImpl.hh
new file mode 100644
index 0000000..bb27d7f
--- /dev/null
+++ b/src/c++/lib/alignment/GlobalLargeIndelAlignerImpl.hh
@@ -0,0 +1,267 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+//
+// \author Chris Saunders
+//
+
+#include <cassert>
+
+#ifdef DEBUG_ALN
+#include "blt_util/log.hh"
+#include <iostream>
+#endif
+
+
+
+template <typename ScoreType>
+template <typename SymIter>
+void
+GlobalLargeIndelAligner<ScoreType>::
+align(
+    const SymIter queryBegin, const SymIter queryEnd,
+    const SymIter refBegin, const SymIter refEnd,
+    AlignmentResult<ScoreType>& result) const
+{
+    result.clear();
+
+    const AlignmentScores<ScoreType>& scores(this->getScores());
+
+    const size_t querySize(std::distance(queryBegin, queryEnd));
+    const size_t refSize(std::distance(refBegin, refEnd));
+
+    assert(0 != querySize);
+    assert(0 != refSize);
+
+    _score1.resize(querySize+1);
+    _score2.resize(querySize+1);
+    _ptrMat.resize(querySize+1, refSize+1);
+
+    ScoreVec* thisSV(&_score1);
+    ScoreVec* prevSV(&_score2);
+
+    static const ScoreType badVal(-10000);
+
+    // global alignment of query
+    //
+    // disallow start from the delete state, control start from insert state with flag
+    //
+    // query can 'fall-off' the end of a short reference, in which case it will
+    // be soft-clipped and each base off the end will be scored as offEdge
+    //
+    for (unsigned queryIndex(0); queryIndex<=querySize; queryIndex++)
+    {
+        PtrVal& headPtr(_ptrMat.val(queryIndex,0));
+        ScoreVal& val((*thisSV)[queryIndex]);
+        headPtr.match = AlignState::MATCH;
+        val.match = queryIndex * scores.offEdge;
+        headPtr.del = AlignState::MATCH;
+        val.del = badVal;
+        if (not scores.isAllowEdgeInsertion)
+        {
+            headPtr.ins = AlignState::MATCH;
+            val.ins = badVal;
+        }
+        else
+        {
+            headPtr.ins = AlignState::INSERT;
+            val.ins = scores.open + (queryIndex * scores.extend);
+        }
+        headPtr.jumpDel = AlignState::MATCH;
+        val.jumpDel = badVal;
+        headPtr.jumpIns = AlignState::MATCH;
+        val.jumpIns = badVal;
+    }
+
+
+#ifdef DEBUG_ALN_MATRIX
+    // store full matrix of scores to print out later, don't turn this debug option on for large references!
+    std::vector<ScoreVec> storeScores;
+
+    storeScores.push_back(*thisSV);
+#endif
+
+    BackTrace<ScoreType> btrace;
+
+    {
+        unsigned refIndex(0);
+        for (SymIter refIter(refBegin); refIter != refEnd; ++refIter, ++refIndex)
+        {
+            std::swap(thisSV,prevSV);
+
+            {
+                // disallow start from the insert or delete states:
+                PtrVal& headPtr(_ptrMat.val(0,refIndex+1));
+                ScoreVal& val((*thisSV)[0]);
+                headPtr.match = AlignState::MATCH;
+                val.match = 0;
+                headPtr.del = AlignState::MATCH;
+                val.del = badVal;
+                headPtr.ins = AlignState::MATCH;
+                val.ins = badVal;
+                headPtr.jumpDel = AlignState::MATCH;
+                val.jumpDel = badVal;
+                headPtr.jumpIns = AlignState::MATCH;
+                val.jumpIns = badVal;
+            }
+
+            unsigned queryIndex(0);
+            for (SymIter queryIter(queryBegin); queryIter != queryEnd; ++queryIter, ++queryIndex)
+            {
+                // update match
+                ScoreVal& headScore((*thisSV)[queryIndex+1]);
+                PtrVal& headPtr(_ptrMat.val(queryIndex+1,refIndex+1));
+                {
+                    const ScoreVal& sval((*prevSV)[queryIndex]);
+                    headPtr.match = this->max5(
+                                        headScore.match,
+                                        sval.match,
+                                        sval.del,
+                                        sval.ins,
+                                        sval.jumpDel,
+                                        sval.jumpIns);
+
+                    headScore.match += ((*queryIter==*refIter) ? scores.match : scores.mismatch);
+                }
+
+                // update delete
+                {
+                    const ScoreVal& sval((*prevSV)[queryIndex+1]);
+                    headPtr.del = this->max5(
+                                      headScore.del,
+                                      sval.match + scores.open,
+                                      sval.del,
+                                      sval.ins,
+                                      badVal,
+                                      sval.jumpIns);
+
+                    headScore.del += scores.extend;
+                    if (0==queryIndex) headScore.del = badVal;
+                }
+
+                // update insert
+                {
+                    const ScoreVal& sval((*thisSV)[queryIndex]);
+                    headPtr.ins = this->max5(
+                                      headScore.ins,
+                                      sval.match + scores.open,
+                                      badVal,
+                                      sval.ins,
+                                      badVal,
+                                      badVal);
+
+                    headScore.ins += scores.extend;
+                    if (0==queryIndex) headScore.ins = badVal;
+                }
+
+                // update jumpDel
+                {
+                    // you can switch between long ins and delete but only
+                    // by paying the full open penalty again
+                    //
+                    // the switch from short ins to jump del is meant to simulate
+                    // a free transition from jump del to short ins, but makes
+                    // the cigar I->D order come out the same as other aligners in this library
+                    const ScoreVal& sval((*prevSV)[queryIndex+1]);
+                    headPtr.jumpDel = this->max5(
+                                          headScore.jumpDel,
+                                          sval.match + _largeIndelScore,
+                                          badVal,
+                                          sval.ins + _largeIndelScore - scores.open,
+                                          sval.jumpDel,
+                                          sval.jumpIns + _largeIndelScore);
+
+                    if (0==queryIndex) headScore.jumpDel = badVal;
+                }
+
+                // update jumpIns
+                {
+                    const ScoreVal& sval((*thisSV)[queryIndex]);
+                    headPtr.jumpIns = this->max5(
+                                          headScore.jumpIns,
+                                          sval.match + _largeIndelScore,
+                                          badVal,
+                                          badVal,
+                                          badVal,
+                                          sval.jumpIns);
+
+                    if (0==queryIndex) headScore.jumpIns = badVal;
+                }
+
+#ifdef DEBUG_ALN
+                log_os << "queryIdx refIdx: " << queryIndex+1 << " " << refIndex+1 << "\n";
+                log_os << headScore.match << ":"
+                       << headScore.del << ":"
+                       << headScore.ins << ":"
+                       << headScore.jumpDel << ":"
+                       << headScore.jumpIns << "/"
+                       << static_cast<int>(headPtr.match)
+                       << static_cast<int>(headPtr.del)
+                       << static_cast<int>(headPtr.ins)
+                       << static_cast<int>(headPtr.jumpDel)
+                       << static_cast<int>(headPtr.jumpIns)<< "\n";
+#endif
+            }
+#ifdef DEBUG_ALN
+            log_os << "\n";
+#endif
+
+#ifdef DEBUG_ALN_MATRIX
+            storeScores.push_back(*thisSV);
+#endif
+
+            // get backtrace info:
+            {
+                const ScoreVal& sval((*thisSV)[querySize]);
+                updateBacktrace(sval.match, refIndex+1, querySize, btrace);
+            }
+        }
+    }
+
+    // optionally allow for trailing insertion
+    if (scores.isAllowEdgeInsertion)
+    {
+        const ScoreVal& sval((*thisSV)[querySize]);
+        updateBacktrace(sval.ins,refSize,querySize,btrace, AlignState::INSERT);
+    }
+
+    // in the backtrace start search, also allow for the case where the query falls-off the end of the reference:
+    for (unsigned queryIndex(0); queryIndex<=querySize; queryIndex++)
+    {
+        const ScoreVal& sval((*thisSV)[queryIndex]);
+        const ScoreType thisMax(sval.match + (querySize-queryIndex) * scores.offEdge);
+        updateBacktrace(thisMax, refSize, queryIndex, btrace);
+    }
+
+#ifdef DEBUG_ALN_MATRIX
+    std::vector<AlignState::index_t> dumpStates {AlignState::MATCH, AlignState::DELETE, AlignState::INSERT, AlignState::JUMP, AlignState::JUMPINS};
+    this->dumpTables(queryBegin, queryEnd,
+                     refBegin, refEnd,
+                     querySize, _ptrMat,
+                     dumpStates, storeScores);
+#endif
+
+    this->backTraceAlignment(
+        queryBegin, queryEnd,
+        refBegin, refEnd,
+        querySize, refSize,
+        _ptrMat,
+        btrace, result);
+}
diff --git a/src/c++/lib/alignment/JumpAlignerBase.hh b/src/c++/lib/alignment/JumpAlignerBase.hh
new file mode 100644
index 0000000..af3867c
--- /dev/null
+++ b/src/c++/lib/alignment/JumpAlignerBase.hh
@@ -0,0 +1,167 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "AlignerBase.hh"
+#include "AlignerUtil.hh"
+#include "Alignment.hh"
+
+#include "blt_util/basic_matrix.hh"
+
+
+/// represents alignment of a query sequence which can switch over from reference1 to reference2
+///
+/// an empty alignment to one reference indicates that the entire alignment is to the other reference
+///
+template <typename ScoreType>
+struct JumpAlignmentResult
+{
+    JumpAlignmentResult()
+    {
+        clear();
+    }
+
+    void
+    clear()
+    {
+        score=0;
+        jumpInsertSize=0;
+        jumpRange=0;
+        align1.clear();
+        align2.clear();
+    }
+
+
+    ScoreType score;
+    unsigned jumpInsertSize;
+    unsigned jumpRange; ///< length of sequence over which jump would have the same score (left-most on align1 is reported)
+    Alignment align1;
+    Alignment align2;
+};
+
+template <typename ScoreType>
+std::ostream& operator<<(std::ostream& os, JumpAlignmentResult<ScoreType>& alignment);
+
+
+
+/// \brief a method to align a contig to two references
+///
+/// the alignment can make a single jump from reference1 to reference2
+///
+template <typename ScoreType>
+struct JumpAlignerBase : public AlignerBase<ScoreType>
+{
+    JumpAlignerBase(
+        const AlignmentScores<ScoreType>& scores,
+        const ScoreType jumpScore) :
+        AlignerBase<ScoreType>(scores),
+        _jumpScore(jumpScore)
+    {}
+
+    /// read-only access to the aligner's scores:
+    const ScoreType&
+    getJumpScore() const
+    {
+        return _jumpScore;
+    }
+
+protected:
+
+    // backtrace logic shared with the intron jump aligner:
+    template <typename SymIter, typename MatrixType>
+    void
+    backTraceAlignment(
+        const SymIter queryBegin, const SymIter queryEnd,
+        const SymIter ref1Begin, const SymIter ref1End,
+        const SymIter ref2Begin, const SymIter ref2End,
+        const size_t querySize, const size_t ref1Size, const size_t ref2Size,
+        const MatrixType& ptrMatrix1, const MatrixType& ptrMatrix2,
+        const BackTrace<ScoreType>& btraceInput,
+        JumpAlignmentResult<ScoreType>& result) const;
+
+    static
+    uint8_t
+    max4(
+        ScoreType& max,
+        const ScoreType v0,
+        const ScoreType v1,
+        const ScoreType v2,
+        const ScoreType v3)
+    {
+        max=v0;
+        uint8_t ptr=0;
+        if (v1>v0)
+        {
+            max=v1;
+            ptr=1;
+        }
+        if (v2>max)
+        {
+            max=v2;
+            ptr=2;
+        }
+        if (v3>max)
+        {
+            max=v3;
+            ptr=3;
+        }
+        return ptr;
+    }
+
+#ifdef DEBUG_ALN_MATRIX
+    /// write out matrix of scores back-trace pointers for debug:
+    template <typename SymIter, typename MatrixType, typename ScoreValType>
+    void
+    dumpTables(
+        const SymIter queryBegin, const SymIter queryEnd,
+        const SymIter ref1Begin, const SymIter ref1End,
+        const SymIter ref2Begin, const SymIter ref2End,
+        const size_t querySize,
+        const MatrixType& ptrMatrix1, const MatrixType& ptrMatrix2,
+        const std::vector<AlignState::index_t>& dumpStates,
+        const std::vector<std::vector<ScoreValType>>& storeScores) const;
+#endif
+
+    const ScoreType _jumpScore;
+};
+
+
+#include "alignment/JumpAlignerBaseImpl.hh"
+
+
+/// Convenience function to run alignment from multiple aligner classes:
+template <typename JumpAligner, typename SymIter, typename ScoreType>
+void
+jumpAlign(
+    const JumpAligner& jumpAligner,
+    const SymIter queryBegin, const SymIter queryEnd,
+    const SymIter ref1Begin, const SymIter ref1End,
+    const SymIter ref2Begin, const SymIter ref2End,
+    JumpAlignmentResult<ScoreType>& result)
+{
+    jumpAligner.align(queryBegin,queryEnd,ref1Begin,ref1End,ref2Begin,ref2End,result);
+}
+
+
diff --git a/src/c++/lib/alignment/JumpAlignerBaseImpl.hh b/src/c++/lib/alignment/JumpAlignerBaseImpl.hh
new file mode 100644
index 0000000..676178f
--- /dev/null
+++ b/src/c++/lib/alignment/JumpAlignerBaseImpl.hh
@@ -0,0 +1,255 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+//
+// \author Chris Saunders
+//
+
+#include <cassert>
+
+#include <iostream>
+
+#if defined(DEBUG_ALN) || defined(DEBUG_ALN_MATRIX)
+#include "blt_util/log.hh"
+#endif
+
+
+#ifdef DEBUG_ALN_MATRIX
+template <typename ScoreType>
+template <typename SymIter, typename MatrixType, typename ScoreValType>
+void
+JumpAlignerBase<ScoreType>::
+dumpTables(
+    const SymIter queryBegin, const SymIter queryEnd,
+    const SymIter ref1Begin, const SymIter ref1End,
+    const SymIter ref2Begin, const SymIter ref2End,
+    const size_t querySize,
+    const MatrixType& ptrMatrix1, const MatrixType& ptrMatrix2,
+    const std::vector<AlignState::index_t>& dumpStates,
+    const std::vector<std::vector<ScoreValType>>& storeScores) const
+{
+    const unsigned stateSize(dumpStates.size());
+    for (unsigned stateIndex(0); stateIndex<stateSize; ++stateIndex)
+    {
+        const AlignState::index_t sIndex(dumpStates[stateIndex]);
+        log_os << "******** Dumping matrix for state: " << AlignState::label(sIndex) << " ********\n";
+        {
+            unsigned queryIndex(0);
+            log_os << "REF    -";
+            for (SymIter queryIter(queryBegin); queryIter != queryEnd; ++queryIter, ++queryIndex)
+            {
+                log_os << "   " << *queryIter;
+            }
+            log_os << "\n";
+        }
+
+        // dump state before refIndex 0
+        unsigned storeIndex(0);
+        this->dumpSingleRefTable(ref1Begin, ref1End, querySize, ptrMatrix1, storeScores, '1', sIndex, storeIndex, log_os);
+
+        storeIndex++;
+        this->dumpSingleRefTable(ref2Begin, ref2End, querySize, ptrMatrix2, storeScores, '2', sIndex, storeIndex, log_os);
+    }
+}
+#endif
+
+
+
+template <typename ScoreType>
+std::ostream&
+operator<<(std::ostream& os, JumpAlignmentResult<ScoreType>& alignment)
+{
+    os << "JumpAligner: score: " << alignment.score << "\n"
+       << "\talign1: " << alignment.align1 << "\n"
+       << "\talign2: " << alignment.align2 << "\n"
+       << "\tjumpInsertSize " << alignment.jumpInsertSize << "\n"
+       << "\tjumpRange " << alignment.jumpRange << "\n";
+    return os;
+}
+
+
+
+template <typename ScoreType>
+template <typename SymIter, typename MatrixType>
+void
+JumpAlignerBase<ScoreType>::
+backTraceAlignment(
+    const SymIter queryBegin, const SymIter queryEnd,
+    const SymIter ref1Begin, const SymIter ref1End,
+    const SymIter ref2Begin, const SymIter ref2End,
+    const size_t querySize, const size_t ref1Size, const size_t ref2Size,
+    const MatrixType& ptrMatrix1, const MatrixType& ptrMatrix2,
+    const BackTrace<ScoreType>& btraceInput,
+    JumpAlignmentResult<ScoreType>& result) const
+{
+    BackTrace<ScoreType> btrace(btraceInput);
+
+    assert(btrace.isInit);
+    assert(btrace.refBegin <= ref1Size+ref2Size);
+    assert(btrace.queryBegin <= querySize);
+
+#ifdef DEBUG_ALN
+    log_os << "qSize: " << querySize << " ref1Size: " << ref1Size << " ref2Size: " << ref2Size << "\n";
+    log_os << "bt-start max: " << btrace.max << " refBegin: " << btrace.refBegin << " qBegin: " << btrace.queryBegin << " state: " << AlignState::label(btrace.state) << "\n";
+#endif
+
+    result.score = btrace.max;
+
+    // traceback:
+    ALIGNPATH::path_t& apath1(result.align1.apath);
+    ALIGNPATH::path_t& apath2(result.align2.apath);
+    ALIGNPATH::path_segment ps;
+
+    // add any trailing soft-clip if we go off the end of the reference:
+    if (btrace.queryBegin < querySize)
+    {
+        ps.type = ALIGNPATH::SOFT_CLIP;
+        ps.length = (querySize-btrace.queryBegin);
+    }
+
+    bool isRef2End(false);
+
+    while ((btrace.queryBegin>0) && (btrace.refBegin>0))
+    {
+        if (isRef2End) break;
+        const bool isRef1(btrace.refBegin<=ref1Size);
+        ALIGNPATH::path_t& apath( isRef1 ? apath1 : apath2 );
+        const unsigned refXBegin(btrace.refBegin - (isRef1 ? 0 : ref1Size));
+        const MatrixType* ptrMatrixX(isRef1 ? &ptrMatrix1 : &ptrMatrix2 );
+        const AlignState::index_t nextState(ptrMatrixX->val(btrace.queryBegin,refXBegin).getStatePtr(btrace.state));
+
+#ifdef DEBUG_ALN
+        log_os << "bt-iter queryIndex: " << btrace.queryBegin
+               << " refIndex: " << btrace.refBegin
+               << " state: " << AlignState::label(btrace.state)
+               << " next: " << AlignState::label(nextState)
+               << "\n";
+        log_os << "\tisref1: " << isRef1 << " refXBegin: " << refXBegin << "\n";
+#endif
+
+        if      (btrace.state==AlignState::MATCH)
+        {
+            if ((!isRef1) && (refXBegin==1) && (nextState==AlignState::MATCH)) isRef2End=true;
+
+            AlignerUtil::updatePath(apath,ps,ALIGNPATH::MATCH);
+            btrace.queryBegin--;
+            btrace.refBegin--;
+        }
+        else if (btrace.state==AlignState::DELETE)
+        {
+            AlignerUtil::updatePath(apath,ps,ALIGNPATH::DELETE);
+            btrace.refBegin--;
+        }
+        else if (btrace.state==AlignState::SPLICE)
+        {
+            if ((!isRef1) && (refXBegin == 1) && (nextState == AlignState::SPLICE)) isRef2End = true;
+
+            AlignerUtil::updatePath(apath,ps,ALIGNPATH::SKIP);
+            btrace.refBegin--;
+        }
+        else if (btrace.state==AlignState::INSERT)
+        {
+            AlignerUtil::updatePath(apath,ps,ALIGNPATH::INSERT);
+            btrace.queryBegin--;
+        }
+        else if (btrace.state==AlignState::JUMP)
+        {
+            if (ps.type != ALIGNPATH::NONE)
+            {
+                assert(btrace.refBegin>=ref1Size);
+                result.align2.beginPos = btrace.refBegin-ref1Size;
+                if (ps.type == ALIGNPATH::INSERT)
+                {
+                    result.jumpInsertSize += ps.length;
+                    ps.type = ALIGNPATH::NONE;
+                    ps.length = 0;
+                }
+                else
+                {
+                    AlignerUtil::updatePath(apath2,ps,ALIGNPATH::NONE);
+                }
+            }
+            else
+            {
+                if (nextState == AlignState::JUMP) btrace.refBegin--;
+            }
+        }
+        else
+        {
+            assert(false && "Unknown align state");
+        }
+        btrace.state=nextState;
+        ps.length++;
+    }
+
+    const bool isRef1(btrace.refBegin<ref1Size);
+    ALIGNPATH::path_t& apath( isRef1 ? apath1 : apath2 );
+
+    if (ps.type != ALIGNPATH::NONE) apath.push_back(ps);
+
+    // soft-clip beginning of read if we fall off the end of the reference
+    if (btrace.queryBegin!=0)
+    {
+        ps.type = ALIGNPATH::SOFT_CLIP;
+        ps.length = btrace.queryBegin;
+        apath.push_back(ps);
+    }
+
+    if (isRef1)
+    {
+        result.align1.beginPos = btrace.refBegin;
+    }
+    else
+    {
+        result.align2.beginPos = btrace.refBegin-ref1Size;
+    }
+
+    std::reverse(apath1.begin(),apath1.end());
+    std::reverse(apath2.begin(),apath2.end());
+
+    // figure out jumpRange:
+    if (result.align1.isAligned() && result.align2.isAligned())
+    {
+        // find the distance over which ref1 and ref2 are equal following the start of the breakpoint
+        SymIter ref1JumpIter(ref1Begin + result.align1.beginPos + apath_ref_length(apath1));
+        SymIter ref2JumpIter(ref2Begin + result.align2.beginPos);
+        while (true)
+        {
+            if (ref1JumpIter == ref1End) break;
+            if (ref2JumpIter == ref2End) break;
+            if ((*ref1JumpIter) != (*ref2JumpIter)) break;
+
+            result.jumpRange++;
+            ref1JumpIter++;
+            ref2JumpIter++;
+        }
+    }
+
+    // if true, output final cigars using seq match '=' and mismatch 'X' symbols:
+    static const bool isOutputSeqMatch(true);
+
+    if (isOutputSeqMatch)
+    {
+        apath_add_seqmatch(queryBegin, queryEnd, (ref1Begin+result.align1.beginPos), ref1End, apath1);
+
+        const unsigned queryOffset = apath_read_length(apath1) + result.jumpInsertSize;
+        apath_add_seqmatch(queryBegin + queryOffset, queryEnd, (ref2Begin+result.align2.beginPos), ref2End, apath2);
+    }
+}
diff --git a/src/c++/lib/alignment/ReadScorer.cpp b/src/c++/lib/alignment/ReadScorer.cpp
new file mode 100644
index 0000000..b96e553
--- /dev/null
+++ b/src/c++/lib/alignment/ReadScorer.cpp
@@ -0,0 +1,89 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Ole Schulz-Trieglaff
+///
+
+#include "ReadScorer.hh"
+#include "blt_util/math_util.hh"
+
+#include <cassert>
+
+//#define DEBUG_RS
+
+
+#ifdef DEBUG_RS
+#include "blt_util/log.hh"
+#include <iostream>
+#endif
+
+
+
+ReadScorer::
+ReadScorer()
+{
+#ifdef DEBUG_RS
+    log_os << "Filling logpcorrectratio table" << std::endl;
+#endif
+    // below we take log(1+(-i)) so set a dummy value for i = 0
+    assert(MAX_QSCORE>=0);
+    _logpcorrectratio[0] = 0.99;
+    for (unsigned i(1); i<=MAX_QSCORE; ++i)
+    {
+        const double eprob(qphred_to_error_prob(i));
+#ifdef DEBUG_RS
+        log_os << "i=" << i << " " << eprob << " " << (log1p_switch(-eprob) - std::log(eprob/3.)) << std::endl;
+#endif
+        _logpcorrectratio[i] = log1p_switch(-eprob) - std::log(eprob/3);
+    }
+}
+
+
+
+double
+ReadScorer::
+getSemiAlignedMetricImpl(
+    const unsigned readLen,
+    const ALIGNPATH::path_t& apath,
+    const uint8_t* qual) const
+{
+    using namespace ALIGNPATH;
+
+    unsigned posInRead = 0;
+    double score(0.);
+
+    for (const path_segment& ps : apath)
+    {
+        assert((ps.type != MATCH) && "Incorrect CIGAR type, matches must be converted to SEQ_MATCH/SEQ_MISMATCH");
+
+        if ((ps.type==SOFT_CLIP) || (ps.type==SEQ_MISMATCH))
+        {
+            assert((posInRead+ps.length) <= readLen);
+            for (unsigned j(0); j<ps.length; ++j)
+            {
+                score += getLogRatio(qual[posInRead+j]);
+            }
+        }
+        if (is_segment_type_read_length(ps.type)) posInRead += ps.length;
+    }
+
+    return score;
+}
diff --git a/src/c++/lib/alignment/ReadScorer.hh b/src/c++/lib/alignment/ReadScorer.hh
new file mode 100644
index 0000000..000a78f
--- /dev/null
+++ b/src/c++/lib/alignment/ReadScorer.hh
@@ -0,0 +1,76 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Ole Schulz-Trieglaff
+///
+
+#pragma once
+
+#include "blt_util/align_path.hh"
+#include "blt_util/qscore.hh"
+
+#include <boost/utility.hpp>
+
+
+/// provide semi-aligned score metric for an aligned read
+///
+/// note: implemented as singleton class which pre-computes score components
+struct ReadScorer : private boost::noncopyable
+{
+    /// return semi aligned read score given a seq-match/mismatch cigar and qualities
+    ///
+    /// not readLen is not strictly required but used as a safety check here
+    static
+    double
+    getSemiAlignedMetric(
+        const unsigned readLen,
+        const ALIGNPATH::path_t& apath,
+        const uint8_t* qual)
+    {
+        return get().getSemiAlignedMetricImpl(readLen, apath,qual);
+    }
+
+private:
+    ReadScorer();
+
+    /// return singleton instance of ReadScorer
+    static const ReadScorer& get()
+    {
+        static const ReadScorer rs;
+        return rs;
+    }
+
+    double
+    getSemiAlignedMetricImpl(
+        const unsigned readLen,
+        const ALIGNPATH::path_t& apath,
+        const uint8_t* qual) const;
+
+    double
+    getLogRatio(const int qual) const
+    {
+        qphred_cache::qscore_check(qual, "basecall quality");
+        return _logpcorrectratio[qual];
+    }
+
+    enum { MAX_QSCORE  = qphred_cache::MAX_QSCORE };
+    double _logpcorrectratio[MAX_QSCORE+1];
+};
diff --git a/src/c++/lib/alignment/SingleRefAlignerShared.hh b/src/c++/lib/alignment/SingleRefAlignerShared.hh
new file mode 100644
index 0000000..d3c826d
--- /dev/null
+++ b/src/c++/lib/alignment/SingleRefAlignerShared.hh
@@ -0,0 +1,96 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// common boilerplate for single-reference sequence aligners
+
+#pragma once
+
+#include "AlignerBase.hh"
+#include "AlignerUtil.hh"
+#include "Alignment.hh"
+
+#include "blt_util/basic_matrix.hh"
+
+#include <iosfwd>
+
+
+template <typename ScoreType>
+struct AlignmentResult
+{
+    AlignmentResult()
+    {
+        clear();
+    }
+
+    void
+    clear()
+    {
+        score=0;
+        align.clear();
+    }
+
+    ScoreType score;
+    Alignment align;
+};
+
+
+template <typename ScoreType>
+std::ostream&
+operator<<(std::ostream& os, AlignmentResult<ScoreType>& alignment);
+
+
+
+template <typename ScoreType>
+struct SingleRefAlignerBase : public AlignerBase<ScoreType>
+{
+    SingleRefAlignerBase(
+        const AlignmentScores<ScoreType>& scores) :
+        AlignerBase<ScoreType>(scores)
+    {}
+
+protected:
+
+    /// returns alignment path of query to reference
+    template <typename SymIter, typename MatrixType>
+    void
+    backTraceAlignment(
+        const SymIter queryBegin, const SymIter queryEnd,
+        const SymIter refBegin, const SymIter refEnd,
+        const size_t querySize, const size_t refSize,
+        const MatrixType& ptrMatrix,
+        const BackTrace<ScoreType>& btraceInput,
+        AlignmentResult<ScoreType>& result) const;
+
+#ifdef DEBUG_ALN_MATRIX
+    /// write out matrix of scores and back-trace pointers for debug:
+    template <typename SymIter, typename MatrixType, typename ScoreValType>
+    void
+    dumpTables(
+        const SymIter queryBegin, const SymIter queryEnd,
+        const SymIter refBegin, const SymIter refEnd,
+        const size_t querySize,
+        const MatrixType& ptrMatrix,
+        const std::vector<AlignState::index_t>& dumpStates,
+        const std::vector<std::vector<ScoreValType>>& storeScores) const;
+#endif
+};
+
+
+#include "SingleRefAlignerSharedImpl.hh"
diff --git a/src/c++/lib/alignment/SingleRefAlignerSharedImpl.hh b/src/c++/lib/alignment/SingleRefAlignerSharedImpl.hh
new file mode 100644
index 0000000..9a9233a
--- /dev/null
+++ b/src/c++/lib/alignment/SingleRefAlignerSharedImpl.hh
@@ -0,0 +1,171 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include <iostream>
+
+#if defined(DEBUG_ALN) || defined(DEBUG_ALN_MATRIX)
+#include "blt_util/log.hh"
+#endif
+
+
+#ifdef DEBUG_ALN_MATRIX
+template <typename ScoreType>
+template <typename SymIter, typename MatrixType, typename ScoreValType>
+void
+SingleRefAlignerBase<ScoreType>::
+dumpTables(
+    const SymIter queryBegin, const SymIter queryEnd,
+    const SymIter refBegin, const SymIter refEnd,
+    const size_t querySize,
+    const MatrixType& ptrMatrix,
+    const std::vector<AlignState::index_t>& dumpStates,
+    const std::vector<std::vector<ScoreValType>>& storeScores) const
+{
+    const unsigned stateSize(dumpStates.size());
+    for (unsigned stateIndex(0); stateIndex<stateSize; ++stateIndex)
+    {
+        const AlignState::index_t sIndex(dumpStates[stateIndex]);
+        log_os << "******** Dumping matrix for state: " << AlignState::label(sIndex) << " ********\n";
+        {
+            unsigned queryIndex(0);
+            log_os << "REF    -";
+            for (SymIter queryIter(queryBegin); queryIter != queryEnd; ++queryIter, ++queryIndex)
+            {
+                log_os << "   " << *queryIter;
+            }
+            log_os << "\n";
+        }
+
+        // dump state before refIndex 0
+        unsigned storeIndex(0);
+        this->dumpSingleRefTable(refBegin,refEnd,querySize,ptrMatrix,storeScores, '1', sIndex, storeIndex, log_os);
+    }
+}
+#endif
+
+
+template <typename ScoreType>
+std::ostream&
+operator<<(std::ostream& os, AlignmentResult<ScoreType>& alignment)
+{
+    os << "AlignerResult: score: " << alignment.score << "\n"
+       << "\talignment: " << alignment.align << "\n";
+    return os;
+}
+
+
+
+// traceback:
+template <typename ScoreType>
+template <typename SymIter, typename MatrixType>
+void
+SingleRefAlignerBase<ScoreType>::
+backTraceAlignment(
+    const SymIter queryBegin, const SymIter queryEnd,
+    const SymIter refBegin, const SymIter refEnd,
+    const size_t querySize, const size_t refSize,
+    const MatrixType& ptrMatrix,
+    const BackTrace<ScoreType>& btraceInput,
+    AlignmentResult<ScoreType>& result) const
+{
+    BackTrace<ScoreType> btrace(btraceInput);
+
+    assert(btrace.isInit);
+    assert(btrace.refBegin <= refSize);
+    assert(btrace.queryBegin <= querySize);
+
+#ifdef DEBUG_ALN
+    log_os << "qSize: " << querySize << " refSize: " << refSize << "\n";
+    log_os << "bt-start max: " << btrace.max << " refBegin: " << btrace.refBegin << " qBegin: " << btrace.queryBegin << " state: " << AlignState::label(btrace.state) << "\n";
+#endif
+
+    result.score = btrace.max;
+
+    // traceback:
+    ALIGNPATH::path_t& apath(result.align.apath);
+    ALIGNPATH::path_segment ps;
+
+    // add any trailing soft-clip if we go off the end of the reference:
+    if (btrace.queryBegin < querySize)
+    {
+        ps.type = ALIGNPATH::SOFT_CLIP;
+        ps.length = (querySize-btrace.queryBegin);
+    }
+
+    while (true)
+    {
+        const AlignState::index_t nextState(ptrMatrix.val(btrace.queryBegin,btrace.refBegin).getStatePtr(btrace.state));
+
+#ifdef DEBUG_ALN
+        log_os << "bt-iter queryIndex: " << btrace.queryBegin
+               << " refIndex: " << btrace.refBegin
+               << " state: " << AlignState::label(btrace.state)
+               << " next: " << AlignState::label(nextState)
+               << "\n";
+#endif
+
+        if (btrace.state==AlignState::MATCH)
+        {
+            if ((btrace.queryBegin<1) or (btrace.refBegin<1)) break;
+            AlignerUtil::updatePath(apath,ps,ALIGNPATH::MATCH);
+            btrace.queryBegin--;
+            btrace.refBegin--;
+        }
+        else if ((btrace.state==AlignState::DELETE) || (btrace.state==AlignState::JUMP))
+        {
+            if (btrace.refBegin<1) break;
+            AlignerUtil::updatePath(apath,ps,ALIGNPATH::DELETE);
+            btrace.refBegin--;
+        }
+        else if ((btrace.state==AlignState::INSERT) || (btrace.state==AlignState::JUMPINS))
+        {
+            if (btrace.queryBegin<1) break;
+            AlignerUtil::updatePath(apath,ps,ALIGNPATH::INSERT);
+            btrace.queryBegin--;
+        }
+        else
+        {
+            assert(false && "Unknown align state");
+        }
+        btrace.state=nextState;
+        ps.length++;
+    }
+
+    if (ps.type != ALIGNPATH::NONE) apath.push_back(ps);
+
+    // soft-clip beginning of read if we fall off the end of the reference
+    if (btrace.queryBegin!=0)
+    {
+        ps.type = ALIGNPATH::SOFT_CLIP;
+        ps.length = btrace.queryBegin;
+        apath.push_back(ps);
+    }
+
+    result.align.beginPos = btrace.refBegin;
+    std::reverse(apath.begin(),apath.end());
+
+    // if true, output final cigars using seq match '=' and mismatch 'X' symbols:
+    static const bool isOutputSeqMatch(true);
+
+    if (isOutputSeqMatch)
+    {
+        apath_add_seqmatch(queryBegin, queryEnd, (refBegin+result.align.beginPos), refEnd, apath);
+    }
+}
diff --git a/src/c++/lib/alignment/test/AlignmentUtilTest.cpp b/src/c++/lib/alignment/test/AlignmentUtilTest.cpp
new file mode 100644
index 0000000..a37de2b
--- /dev/null
+++ b/src/c++/lib/alignment/test/AlignmentUtilTest.cpp
@@ -0,0 +1,60 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/test/unit_test.hpp"
+
+#include "AlignmentUtil.hh"
+
+#include "blt_util/align_path.hh"
+
+#include <string>
+
+
+
+BOOST_AUTO_TEST_SUITE( test_AlignmnetUtil )
+
+
+
+BOOST_AUTO_TEST_CASE( test_getQuerySegments )
+{
+    const std::string querySeq("AAAACCGGG");
+
+    JumpAlignmentResult<int> result;
+
+    cigar_to_apath("4M",result.align1.apath);
+    result.jumpInsertSize = 2;
+    cigar_to_apath("3M",result.align2.apath);
+
+    std::string bp1Seq,insertSeq,bp2Seq;
+    getFwdStrandQuerySegments(result,querySeq,false,true,false,
+                              bp1Seq,bp2Seq,insertSeq);
+
+    BOOST_REQUIRE_EQUAL(bp1Seq,"TTTT");
+    BOOST_REQUIRE_EQUAL(insertSeq,"GG");
+    BOOST_REQUIRE_EQUAL(bp2Seq,"GGG");
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/alignment/test/CMakeLists.txt b/src/c++/lib/alignment/test/CMakeLists.txt
new file mode 100644
index 0000000..ebb21c8
--- /dev/null
+++ b/src/c++/lib/alignment/test/CMakeLists.txt
@@ -0,0 +1,29 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## Configuration file for the unit tests subdirectory
+##
+## author Ole Schulz-Trieglaff
+##
+################################################################################
+
+set(ADDITIONAL_UNITTEST_LIB manta_blt_util)
+include(${THIS_CXX_TEST_LIBRARY_CMAKE})
diff --git a/src/c++/lib/alignment/test/GlobalAlignerTest.cpp b/src/c++/lib/alignment/test/GlobalAlignerTest.cpp
new file mode 100644
index 0000000..cb40518
--- /dev/null
+++ b/src/c++/lib/alignment/test/GlobalAlignerTest.cpp
@@ -0,0 +1,227 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/test/unit_test.hpp"
+
+#include "GlobalAligner.hh"
+
+#include "blt_util/align_path.hh"
+
+#include <string>
+
+
+
+BOOST_AUTO_TEST_SUITE( test_GlobalAligner )
+
+typedef short int score_t;
+
+static
+AlignmentResult<score_t>
+testAlign(
+    const std::string& seq,
+    const std::string& ref,
+    const int offEdgeScore = -4,
+    const bool isAllowEdgeInsertion = false)
+{
+    AlignmentScores<score_t> scores(2, -4, -5, -1, offEdgeScore, isAllowEdgeInsertion);
+    GlobalAligner<score_t> aligner(scores);
+    AlignmentResult<score_t> result;
+    aligner.align(seq.begin(),seq.end(),ref.begin(),ref.end(),result);
+
+    return result;
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalAligner1 )
+{
+    static const std::string seq("D");
+    static const std::string ref("ABCDEF");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"1=");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,3);
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalAlignerDelete )
+{
+    static const std::string seq("BCDEFHIKLM");
+    static const std::string ref("ABCDEFGHIKLMN");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"5=1D5=");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,1);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalAlignerInsert )
+{
+    static const std::string seq("BCDEFGXHIKLM");
+    static const std::string ref("ABCDEFGHIKLMN");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"6=1I5=");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,1);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalAlignerInsertDelete )
+{
+    static const std::string seq("BBBBBBCDXYZHIKLMMMM");
+    static const std::string ref("ABBBBBBCDEFGHIKLMMMMN");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"8=3I3D8=");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,1);
+}
+
+BOOST_AUTO_TEST_CASE( test_GlobalAlignerInsertDelete2 )
+{
+    static const std::string seq("BBBBBBCDEXYHIKLMMMM");
+    static const std::string ref("ABBBBBBCDEFGHIKLMMMMN");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"9=2X8=");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,1);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalAlignerShortRef1 )
+{
+    static const std::string seq("ABCD");
+    static const std::string ref("BCD");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"1S3=");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,0);
+    BOOST_REQUIRE_EQUAL(result.score,2);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalAlignerShortRef2 )
+{
+    static const std::string seq("ABCD");
+    static const std::string ref("ABC");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"3=1S");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,0);
+    BOOST_REQUIRE_EQUAL(result.score,2);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalAlignerShortRef3 )
+{
+    static const std::string seq("ABCD");
+    static const std::string ref("B");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"1S1=2S");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,0);
+    BOOST_REQUIRE_EQUAL(result.score,-10);
+}
+
+
+// show that the method left aligns a deletion within a repeat
+//
+BOOST_AUTO_TEST_CASE( test_GlobalAlignerLeftShift )
+{
+    static const std::string seq("ABCDEFFFFFGHIJKL");
+    static const std::string ref("ABCDEFFFFFFGHIJKL");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"5=1D11=");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,0);
+}
+
+// show that the method left aligns an insertion within a repeat
+//
+BOOST_AUTO_TEST_CASE( test_GlobalAlignerLeftShift2 )
+{
+    static const std::string seq("ABCDEFFFFFFFGHIJKL");
+    static const std::string ref("ABCDEFFFFFFGHIJKL");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"5=1I12=");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,0);
+}
+
+
+// show that alignment is global over the query
+//
+BOOST_AUTO_TEST_CASE( test_GlobalAlignerGlocal )
+{
+    static const std::string seq("AABCC");
+    static const std::string ref("ZZBYY");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"2X1=2X");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,0);
+}
+
+
+// show that soft-clipping weights and edge indel blocking are working correctly by setting
+// off-edge score to a prohibitively high value.
+//
+BOOST_AUTO_TEST_CASE( test_GlobalAlignerNoClip )
+{
+    static const std::string seq("12ABCDEFFFFFFFGHIJKL12");
+    static const std::string ref(  "ABCDEFFFFFFFGHIJKL");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref,-1000);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"1X2I16=2I1X");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,0);
+}
+
+
+// show that soft-clipping weights and edge indel blocking are working correctly by setting
+// off-edge score to a prohibitively high value, and allowing edge Indels
+//
+BOOST_AUTO_TEST_CASE( test_GlobalAlignerNoClipEdgeIndel )
+{
+    static const std::string seq("12ABCDEFFFFFFFGHIJKL12");
+    static const std::string ref(  "ABCDEFFFFFFFGHIJKL");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref,-1000, true);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"2I18=2I");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,0);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/alignment/test/GlobalJumpAlignerTest.cpp b/src/c++/lib/alignment/test/GlobalJumpAlignerTest.cpp
new file mode 100644
index 0000000..9a39d79
--- /dev/null
+++ b/src/c++/lib/alignment/test/GlobalJumpAlignerTest.cpp
@@ -0,0 +1,351 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/test/unit_test.hpp"
+
+#include "GlobalJumpAligner.hh"
+
+#include "blt_util/align_path.hh"
+
+#include <string>
+
+
+
+BOOST_AUTO_TEST_SUITE( test_GlobalJumpAligner )
+
+typedef short int score_t;
+
+static
+JumpAlignmentResult<score_t>
+testAlign(
+    const std::string& seq,
+    const std::string& ref1,
+    const std::string& ref2)
+{
+    AlignmentScores<score_t> scores(2,-4,-5,-1,-1);
+    score_t jumpScore(-3);
+    GlobalJumpAligner<score_t> aligner(scores,jumpScore);
+    JumpAlignmentResult<score_t> result;
+    aligner.align(
+        seq.begin(),seq.end(),
+        ref1.begin(),ref1.end(),
+        ref2.begin(),ref2.end(),
+        result);
+
+    return result;
+}
+
+
+static
+JumpAlignmentResult<score_t>
+testAlign2(
+    const std::string& seq,
+    const std::string& ref1,
+    const std::string& ref2)
+{
+    static const AlignmentScores<score_t> scores(2,-4,-10,-1,-1);
+    static const int jumpScore(-20);
+    GlobalJumpAligner<score_t> aligner(scores,jumpScore);
+    JumpAlignmentResult<score_t> result;
+    aligner.align(
+        seq.begin(),seq.end(),
+        ref1.begin(),ref1.end(),
+        ref2.begin(),ref2.end(),
+        result);
+
+    return result;
+}
+
+
+
+static
+JumpAlignmentResult<score_t>
+testAlign3(
+    const std::string& seq,
+    const std::string& ref1,
+    const std::string& ref2)
+{
+    static const AlignmentScores<score_t> scores(2,-4,-2,0,-1);
+    static const int jumpScore(-20);
+    GlobalJumpAligner<score_t> aligner(scores,jumpScore);
+    JumpAlignmentResult<score_t> result;
+    aligner.align(
+        seq.begin(),seq.end(),
+        ref1.begin(),ref1.end(),
+        ref2.begin(),ref2.end(),
+        result);
+
+    return result;
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAligner0 )
+{
+    static const std::string seq("ABABACDCDC");
+    static const std::string ref1("ABABA");
+    static const std::string ref2("CDCDC");
+
+    JumpAlignmentResult<score_t> result = testAlign(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath),"5=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos,0);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath),"5=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos,0);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAligner1 )
+{
+    static const std::string seq("ABABACDCDC");
+    static const std::string ref1("ABABAX");
+    static const std::string ref2("CDCDC");
+
+    JumpAlignmentResult<score_t> result = testAlign(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath),"5=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos,0);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath),"5=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos,0);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAligner2 )
+{
+    static const std::string seq("ABABACDCDC");
+    static const std::string ref1("ABABA");
+    static const std::string ref2("XCDCDC");
+
+    JumpAlignmentResult<score_t> result = testAlign(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath),"5=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos,0);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath),"5=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos,1);
+}
+
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAlignerLong )
+{
+    static const std::string seq("ABABACDCDC");
+    static const std::string ref1("dslfjfkjaslABABAlsjfkdsflsk");
+    static const std::string ref2("sdfldsklkjdCDCDCfsdlkjfslk");
+
+    JumpAlignmentResult<score_t> result = testAlign(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath),"5=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos,11);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath),"5=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos,11);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAlignerSimpleIndels )
+{
+    static const std::string seq("ABABAABABACDCDCDyCDCDC");
+    static const std::string ref1("xABABABABABAx");
+    static const std::string ref2("xCDCDCDCDCDCDCx");
+
+    JumpAlignmentResult<score_t> result = testAlign(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath),"5=1D5=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos,1);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath),"6=1I5=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos,1);
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAlignerBPInsert )
+{
+    static const std::string seq("ABABABABABA1234CDCDCDCDCDC");
+    static const std::string ref1("xABABABABABAx");
+    static const std::string ref2("xCDCDCDCDCDCDCx");
+
+    JumpAlignmentResult<score_t> result = testAlign(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath),"11=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos,1);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath),"11=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos,1);
+    BOOST_REQUIRE_EQUAL(result.jumpInsertSize,4u);
+}
+
+
+// define behavior when the breakpoint solution is a 1d range
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAlignerBPRange )
+{
+    static const std::string seq("xyzxyzxyzABCABCABCxyzxyzxyz");
+    static const std::string ref1("xyzxyzxyzxyzABCABCstustu");
+    static const std::string ref2("stustuABCABCxyzxyzxyzxyz");
+
+    JumpAlignmentResult<score_t> result = testAlign(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath),"12=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos,3);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath),"15=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos,6);
+    BOOST_REQUIRE_EQUAL(result.jumpInsertSize,0u);
+    BOOST_REQUIRE_EQUAL(result.jumpRange,3u);
+}
+
+
+// define behavior when the breakpoint solution is a 1d range
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAlignerBPRange2 )
+{
+    static const std::string seq("xyzxyzxyzABCABCABCxyzxyzxyz");
+    static const std::string ref1("xyzxyzxyzxyzABCABCABCABCABC");
+    static const std::string ref2("ABCABCABCABCABCxyzxyzxyzxyz");
+
+    JumpAlignmentResult<score_t> result = testAlign(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath),"9=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos,3);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath),"18=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos,6);
+    BOOST_REQUIRE_EQUAL(result.jumpInsertSize,0u);
+    BOOST_REQUIRE_EQUAL(result.jumpRange,9u);
+}
+
+
+// define behavior when the breakpoint solution has an insertion with a repeat
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAlignerInsert )
+{
+    static const std::string seq("xyzxyzxyzABCABCABCABCABCABCxyzxyzxyz");
+    static const std::string ref1("xyzxyzxyzxyzABCABCstustu");
+    static const std::string ref2("stustuABCABCxyzxyzxyzxyz");
+
+    JumpAlignmentResult<score_t> result = testAlign(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath),"15=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos,3);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath),"15=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos,6);
+    BOOST_REQUIRE_EQUAL(result.jumpInsertSize,6u);
+    BOOST_REQUIRE_EQUAL(result.jumpRange,0u);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAlignerOnly1 )
+{
+    static const std::string seq("ABABA");
+    static const std::string ref1("xABABAx");
+    static const std::string ref2("xCDCDCx");
+
+    JumpAlignmentResult<score_t> result = testAlign(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath),"5=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos,1);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath),"");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos,0);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAlignerOnly2 )
+{
+    static const std::string seq("CDCDC");
+    static const std::string ref1("xABABAx");
+    static const std::string ref2("xCDCDCx");
+
+    JumpAlignmentResult<score_t> result = testAlign(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath),"");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos,0);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath),"5=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos,1);
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAlignerOffEdge )
+{
+    static const std::string seq("123456ABABACDCDC123456");
+    static const std::string ref1("xABABAx");
+    static const std::string ref2("xCDCDCx");
+
+    JumpAlignmentResult<score_t> result = testAlign(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath),"5S1X5=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos,0);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath),"5=1X5S");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos,1);
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAlignerRef2Clip )
+{
+    // extracted from production failure case:
+    //
+    static const std::string seq("GGCAGAAAAGGAAATA");
+    static const std::string ref1("TAAAAAGTAGAT");
+    static const std::string ref2("AAAGGAAATA");
+
+    JumpAlignmentResult<score_t> result = testAlign2(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath),"");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos,0);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath),"6S10=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos,0);
+    BOOST_REQUIRE_EQUAL(result.jumpInsertSize,0u);
+    BOOST_REQUIRE_EQUAL(result.jumpRange,0u);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAlignerRef1Clip )
+{
+    // extracted from production failure case:
+    //
+    static const std::string seq("TAAAAAGTAGATTTCGT");
+    static const std::string ref1("TAAAAAGTAGAT");
+    static const std::string ref2("AAAGGAAATA");
+
+    JumpAlignmentResult<score_t> result = testAlign2(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath),"12=5S");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos,0);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath),"");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos,0);
+    BOOST_REQUIRE_EQUAL(result.jumpInsertSize,0u);
+    BOOST_REQUIRE_EQUAL(result.jumpRange,0u);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAlignerDelJunction )
+{
+    static const std::string seq("AAAACCCCCCCCTTTTAAAATTTT");
+    static const std::string ref1("AAAAAAAACCCCCCCCG");
+    static const std::string ref2("GGGGTTTTAAAATTTT");
+
+    JumpAlignmentResult<score_t> result = testAlign3(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath),"12=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos,4);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath),"12=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos,4);
+    BOOST_REQUIRE_EQUAL(result.jumpInsertSize,0u);
+    BOOST_REQUIRE_EQUAL(result.jumpRange,0u);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/alignment/test/GlobalJumpIntronAlignerTest.cpp b/src/c++/lib/alignment/test/GlobalJumpIntronAlignerTest.cpp
new file mode 100644
index 0000000..71d0b08
--- /dev/null
+++ b/src/c++/lib/alignment/test/GlobalJumpIntronAlignerTest.cpp
@@ -0,0 +1,319 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Felix Schlesinger
+///
+
+#include "boost/test/unit_test.hpp"
+
+#include "GlobalJumpIntronAligner.hh"
+
+#include "blt_util/align_path.hh"
+
+#include <string>
+
+#ifdef DEBUG
+#include "blt_util/log.hh"
+#include <iostream>
+#endif
+
+BOOST_AUTO_TEST_SUITE( test_GlobalJumpIntronAligner )
+
+typedef short int score_t;
+
+static
+JumpAlignmentResult<score_t>
+testAlignScores(
+    const std::string& seq,
+    const std::string& ref1,
+    const std::string& ref2,
+    int match, int mismatch, int open, int extend, int spliceOpen, int offEdge, int spliceOffEdge, int jump,
+    bool stranded, bool bp1Fw, bool bp2Fw)
+{
+    AlignmentScores<score_t> scores(match, mismatch, open, extend, offEdge);
+    score_t jumpScore(jump);
+    GlobalJumpIntronAligner<score_t> aligner(scores,jumpScore,spliceOpen,spliceOffEdge);
+    JumpAlignmentResult<score_t> result;
+    aligner.align(
+        seq.begin(),seq.end(),
+        ref1.begin(),ref1.end(),
+        ref2.begin(),ref2.end(),
+        bp1Fw, bp2Fw, stranded,
+        result);
+
+    return result;
+}
+
+static
+JumpAlignmentResult<score_t>
+testAlign(
+    const std::string& seq,
+    const std::string& ref1,
+    const std::string& ref2)
+{
+    AlignmentScores<score_t> scores(2, -4, -5, -1, -1);
+    return testAlignScores(seq, ref1, ref2, 2, -4, -5, -1, -4, -1, -1, -3, true, true, true);
+}
+
+static
+JumpAlignmentResult<score_t>
+testAlignSplice(
+    const std::string& seq,
+    const std::string& ref1,
+    const std::string& ref2,
+    bool stranded=true,
+    bool fw=true)
+{
+    return testAlignScores(seq, ref1, ref2, 2,-4,-5,-1,-4,-1,-1, -3, stranded, fw, fw);
+}
+
+static
+JumpAlignmentResult<score_t>
+testAlignSpliceNoJump(
+    const std::string& seq,
+    const std::string& ref1,
+    const std::string& ref2,
+    bool stranded = true,
+    bool fw = true)
+{
+    return testAlignScores(seq, ref1, ref2, 2, -8, -19, -1, -15, -1, -1, -10000, stranded, fw, fw);
+}
+
+BOOST_AUTO_TEST_CASE(test_GlobalJumpIntronAligner0)
+{
+    static const std::string seq("ABABACDCDC");
+    static const std::string ref1("ABABA");
+    static const std::string ref2("CDCDC");
+
+    JumpAlignmentResult<score_t> result = testAlign(seq, ref1, ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath), "5=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos, 0);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath), "5=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos, 0);
+}
+
+BOOST_AUTO_TEST_CASE(test_GlobalJumpIntronAligner1)
+{
+    static const std::string seq("ABABACDCDC");
+    static const std::string ref1("ABABAX");
+    static const std::string ref2("CDCDC");
+
+    JumpAlignmentResult<score_t> result = testAlign(seq, ref1, ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath), "5=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos, 0);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath), "5=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos, 0);
+}
+
+
+BOOST_AUTO_TEST_CASE(test_GlobalJumpIntronAligner2)
+{
+    static const std::string seq("ABABACDCDC");
+    static const std::string ref1("ABABA");
+    static const std::string ref2("XCDCDC");
+
+    JumpAlignmentResult<score_t> result = testAlign(seq, ref1, ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath), "5=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos, 0);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath), "5=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos, 1);
+}
+
+BOOST_AUTO_TEST_CASE(test_GlobalJumpIntronAlignerLong)
+{
+    static const std::string seq("ABABACDCDC");
+    static const std::string ref1("dslfjfkjaslABABAlsjfkdsflsk");
+    static const std::string ref2("sdfldsklkjdCDCDCfsdlkjfslk");
+
+    JumpAlignmentResult<score_t> result = testAlign(seq, ref1, ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath), "5=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos, 11);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath), "5=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos, 11);
+}
+
+
+BOOST_AUTO_TEST_CASE(test_GlobalJumpIntronAlignerSimpleIndels)
+{
+    static const std::string seq("ABABAABABACDCDCDyCDCDC");
+    static const std::string ref1("xABABABABABAx");
+    static const std::string ref2("xCDCDCDCDCDCDCx");
+
+    JumpAlignmentResult<score_t> result = testAlign(seq, ref1, ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath), "5=1D5=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos, 1);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath), "6=1I5=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos, 1);
+}
+
+
+
+BOOST_AUTO_TEST_CASE(test_GlobalJumpIntronAlignerBPInsert)
+{
+    static const std::string seq("ABABABABABA1234CDCDCDCDCDC");
+    static const std::string ref1("xABABABABABAx");
+    static const std::string ref2("xCDCDCDCDCDCDCx");
+
+    JumpAlignmentResult<score_t> result = testAlign(seq, ref1, ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath), "11=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos, 1);
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath), "11=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos, 1);
+    BOOST_REQUIRE_EQUAL(result.jumpInsertSize, 4u);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAlignerSplice )
+{
+    static const std::string seq("AAAAABBBBB");
+    static const std::string ref1("xAAAAAGTxxxAGBBBBBx");
+    static const std::string ref2("xxxx");
+
+    JumpAlignmentResult<score_t> result = testAlignSplice(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath),"5=7N5=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos,1);
+}
+
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAlignerSpliceRef2 )
+{
+    static const std::string seq("AAAAABBBBB");
+    static const std::string ref1("xxxx");
+    static const std::string ref2("xAAAAAGTxxxAGBBBBBx");
+
+    JumpAlignmentResult<score_t> result = testAlignSplice(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath),"5=7N5=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos,1);
+}
+
+BOOST_AUTO_TEST_CASE(test_GlobalJumpAlignerSpliceRev)
+{
+    static const std::string seq("AAAAABBBBB");
+    static const std::string ref1("xxxx");
+    static const std::string ref2("xAAAAACTxxxACBBBBBx");
+
+    JumpAlignmentResult<score_t> result = testAlignSplice(seq, ref1, ref2, true, false);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath), "5=7N5=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos, 1);
+}
+
+BOOST_AUTO_TEST_CASE(test_GlobalJumpAlignerSpliceUnstrandedRev)
+{
+    static const std::string seq("AAAAABBBBB");
+    static const std::string ref1("xxxx");
+    static const std::string ref2("xAAAAACTxxxACBBBBBx");
+
+    JumpAlignmentResult<score_t> result = testAlignSplice(seq, ref1, ref2, false);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath), "5=7N5=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos, 1);
+}
+
+BOOST_AUTO_TEST_CASE(test_GlobalJumpAlignerNoSpliceWrongStrand)
+{
+    static const std::string seq("AAAAABBBBB");
+    static const std::string ref1("xxxx");
+    static const std::string ref2("xAAAAACTxxxACBBBBBx");
+
+    JumpAlignmentResult<score_t> result = testAlignSplice(seq, ref1, ref2, true, true);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath), "5=7D5=");
+    BOOST_REQUIRE_EQUAL(result.align2.beginPos, 1);
+}
+
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAlignerSpliceNoSplice )
+{
+    static const std::string seq("AAAAABBBBB");
+    static const std::string ref1("xAAAAAGGxxxAGBBBBBx");
+    static const std::string ref2("xxxx");
+
+    JumpAlignmentResult<score_t> result = testAlignSplice(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath),"5=7D5=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos,1);
+}
+
+BOOST_AUTO_TEST_CASE( test_GlobalJumpAlignerSpliceNoSplice2 )
+{
+    static const std::string seq("AAAAABBBBB");
+    static const std::string ref1("xAAAAAGTxxxGGBBBBBx");
+    static const std::string ref2("xxxx");
+
+    JumpAlignmentResult<score_t> result = testAlignSplice(seq,ref1,ref2);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath),"5=7D5=");
+    BOOST_REQUIRE_EQUAL(result.align1.beginPos,1);
+}
+
+BOOST_AUTO_TEST_CASE(test_GlobalJumpAlignerIntronOffEdgeRef2Left)
+{
+    static const std::string seq("GGACTGGGCCCAGA");
+    static const std::string ref1("ATTTAACTCAAGATGGATTAA");
+    static const std::string ref2("GGTTACCTGGGCCCAGATCCTGCTTGGTCACACCCAGCCCAGAAGAC");
+
+    JumpAlignmentResult<score_t> result = testAlignSpliceNoJump(seq, ref1, ref2, false);
+#ifdef DEBUG
+    log_os << result << "\n";
+#endif
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath), "");
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath), "3S6N11=");
+    BOOST_REQUIRE_EQUAL(result.score, 11 * 2 - 3);
+}
+
+BOOST_AUTO_TEST_CASE(test_GlobalJumpAlignerIntronOffEdgeRef2Right)
+{
+    static const std::string seq("TGGTCACACCCXXXX");
+    static const std::string ref1("ATTTAACTCAAGATGGATTAA");
+    static const std::string ref2("XXTGGTCACACCCGTAGCCCAAGAC");
+
+    JumpAlignmentResult<score_t> result = testAlignSpliceNoJump(seq, ref1, ref2, false);
+#ifdef DEBUG
+    log_os << result << "\n";
+#endif
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath), "");
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath), "11=12N4S");
+    BOOST_REQUIRE_EQUAL(result.score, 11*2-15-4);
+}
+
+BOOST_AUTO_TEST_CASE(test_GlobalJumpAlignerIntronOffEdgeRef1Both)
+{
+    static const std::string seq("TTTTACACACACACACACACGGGG");
+    static const std::string ref1("GGGGGGGACACACACACACACACACGTTTTTTT");
+    static const std::string ref2("ATTTAACTCAAGATGGATTAA");
+
+    JumpAlignmentResult<score_t> result = testAlignSpliceNoJump(seq, ref1, ref2, false);
+#ifdef DEBUG
+    log_os << result << "\n";
+#endif
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align1.apath), "4S9N16=8N4S");
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align2.apath), "");
+    BOOST_REQUIRE_EQUAL(result.score, -4+16*2-15-4);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/c++/lib/alignment/test/GlobalLargeIndelAlignerTest.cpp b/src/c++/lib/alignment/test/GlobalLargeIndelAlignerTest.cpp
new file mode 100644
index 0000000..d759651
--- /dev/null
+++ b/src/c++/lib/alignment/test/GlobalLargeIndelAlignerTest.cpp
@@ -0,0 +1,239 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/test/unit_test.hpp"
+
+#include "GlobalLargeIndelAligner.hh"
+
+#include "blt_util/align_path.hh"
+
+#include <string>
+
+
+
+BOOST_AUTO_TEST_SUITE( test_GlobalLargeIndelAligner )
+
+typedef short int score_t;
+
+static
+AlignmentResult<score_t>
+testAlign(
+    const std::string& seq,
+    const std::string& ref)
+{
+    AlignmentScores<score_t> scores(2,-4,-5,-1,-4);
+    GlobalLargeIndelAligner<score_t> aligner(scores,-10);
+    AlignmentResult<score_t> result;
+    aligner.align(seq.begin(),seq.end(),ref.begin(),ref.end(),result);
+
+    return result;
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalLargeIndelAligner1 )
+{
+    static const std::string seq("D");
+    static const std::string ref("ABCDEF");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"1=");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,3);
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalLargeIndelAlignerDelete )
+{
+    static const std::string seq("BCDEFHIKLM");
+    static const std::string ref("ABCDEFGHIKLMN");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"5=1D5=");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,1);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalLargeIndelAlignerInsert )
+{
+    static const std::string seq("BCDEFGXHIKLM");
+    static const std::string ref("ABCDEFGHIKLMN");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"6=1I5=");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,1);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalLargeIndelAlignerInsertDelete )
+{
+    static const std::string seq("BBBBBBCDXYZHIKLMMMM");
+    static const std::string ref("ABBBBBBCDEFGHIKLMMMMN");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"8=3I3D8=");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,1);
+}
+
+BOOST_AUTO_TEST_CASE( test_GlobalLargeIndelAlignerInsertDelete2 )
+{
+    static const std::string seq("BBBBBBCDEXYHIKLMMMM");
+    static const std::string ref("ABBBBBBCDEFGHIKLMMMMN");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"9=2X8=");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,1);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalLargeIndelAlignerShortRef1 )
+{
+    static const std::string seq("ABCD");
+    static const std::string ref("BCD");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"1S3=");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,0);
+    BOOST_REQUIRE_EQUAL(result.score,2);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalLargeIndelAlignerShortRef2 )
+{
+    static const std::string seq("ABCD");
+    static const std::string ref("ABC");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"3=1S");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,0);
+    BOOST_REQUIRE_EQUAL(result.score,2);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalLargeIndelAlignerShortRef3 )
+{
+    static const std::string seq("ABCD");
+    static const std::string ref("B");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"1S1=2S");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,0);
+    BOOST_REQUIRE_EQUAL(result.score,-10);
+}
+
+
+// show that the method left aligns a deletion within a repeat
+//
+BOOST_AUTO_TEST_CASE( test_GlobalLargeIndelAlignerLeftShift )
+{
+    static const std::string seq("ABCDEFFFFFGHIJKL");
+    static const std::string ref("ABCDEFFFFFFGHIJKL");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"5=1D11=");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,0);
+}
+
+// show that the method left aligns an insertion within a repeat
+//
+BOOST_AUTO_TEST_CASE( test_GlobalLargeIndelAlignerLeftShift2 )
+{
+    static const std::string seq("ABCDEFFFFFFFGHIJKL");
+    static const std::string ref("ABCDEFFFFFFGHIJKL");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"5=1I12=");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,0);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GlobalLargeIndelAlignerBigDelete )
+{
+    static const std::string seq("BCDEFHIKLM");
+    static const std::string ref("ABCDEFGGGGGGGGGGGGGGGGGGGGGGGGGGHIKLMN");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"5=26D5=");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,1);
+    BOOST_REQUIRE_EQUAL(result.score,10);
+}
+
+BOOST_AUTO_TEST_CASE( test_GlobalLargeIndelAlignerBigDeleteSmallInsert )
+{
+    static const std::string seq("BCDEFXHIKLM");
+    static const std::string ref("ABCDEFGGGGGGGGGGGGGGGGGGGGGGGGGGHIKLMN");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"5=1I26D5=");
+    BOOST_REQUIRE_EQUAL(result.align.beginPos,1);
+    BOOST_REQUIRE_EQUAL(result.score,9);
+}
+
+BOOST_AUTO_TEST_CASE( test_GlobalLargeIndelAlignerBigInsert )
+{
+    static const std::string seq("BCDEFGXXXXXXXXXXXXXXXXXXXXXXXXHIKLM");
+    static const std::string ref("ABCDEFGHIKLMN");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"6=24I5=");
+    BOOST_REQUIRE_EQUAL(result.score,12);
+}
+
+BOOST_AUTO_TEST_CASE( test_GlobalLargeIndelAlignerBigInsertSmallDelete )
+{
+    static const std::string seq("BCDEFGXXXXXXXXXXXXXXXXXXXXXXXXIKLM");
+    static const std::string ref("ABCDEFGHIKLMN");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"6=24I1D4=");
+    BOOST_REQUIRE_EQUAL(result.score,9);
+}
+
+BOOST_AUTO_TEST_CASE( test_GlobalLargeIndelAlignerBigInsertSmallDelete2 )
+{
+    static const std::string seq("BCDEFXXXXXXXXXXXXXXXXXXXXXXXXHIKLM");
+    static const std::string ref("ABCDEFGHIKLMN");
+
+    AlignmentResult<score_t> result = testAlign(seq,ref);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(result.align.apath),"5=24I1D5=");
+    BOOST_REQUIRE_EQUAL(result.score,9);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/alignment/test/test_main.cpp b/src/c++/lib/alignment/test/test_main.cpp
new file mode 100644
index 0000000..3a2f6be
--- /dev/null
+++ b/src/c++/lib/alignment/test/test_main.cpp
@@ -0,0 +1,23 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#define BOOST_TEST_MODULE libalignment
+#include "boost/test/unit_test.hpp"
+
diff --git a/src/c++/lib/applications/CMakeLists.txt b/src/c++/lib/applications/CMakeLists.txt
new file mode 100644
index 0000000..1ebcbfc
--- /dev/null
+++ b/src/c++/lib/applications/CMakeLists.txt
@@ -0,0 +1,24 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+subdirlist(subdirs ${CMAKE_CURRENT_SOURCE_DIR})
+
+foreach(THIS_LIB_DIR ${subdirs})
+    add_subdirectory(${THIS_LIB_DIR})
+endforeach()
diff --git a/src/c++/lib/applications/CheckSVLoci/CMakeLists.txt b/src/c++/lib/applications/CheckSVLoci/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/applications/CheckSVLoci/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/applications/CheckSVLoci/CSLOptions.cpp b/src/c++/lib/applications/CheckSVLoci/CSLOptions.cpp
new file mode 100644
index 0000000..ba9e583
--- /dev/null
+++ b/src/c++/lib/applications/CheckSVLoci/CSLOptions.cpp
@@ -0,0 +1,95 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "CSLOptions.hh"
+#include "common/ProgramUtil.hh"
+
+#include "blt_util/log.hh"
+#include "boost/filesystem.hpp"
+#include "boost/program_options.hpp"
+
+
+
+static
+void
+usage(
+    std::ostream& os,
+    const illumina::Program& prog,
+    const boost::program_options::options_description& visible,
+    const char* msg = nullptr)
+{
+    usage(os, prog, visible, "load binary sv locus graph and check validity, including after final cleaning steps", "", msg);
+}
+
+
+
+void
+parseCSLOptions(const illumina::Program& prog,
+                int argc, char* argv[],
+                CSLOptions& opt)
+{
+    namespace po = boost::program_options;
+    po::options_description req("configuration");
+    req.add_options()
+    ("graph-file", po::value(&opt.graphFilename),
+     "sv locus graph file")
+    ;
+
+    po::options_description help("help");
+    help.add_options()
+    ("help,h","print this message");
+
+    po::options_description visible("options");
+    visible.add(req).add(help);
+
+    bool po_parse_fail(false);
+    po::variables_map vm;
+    try
+    {
+        po::store(po::parse_command_line(argc, argv, visible,
+                                         po::command_line_style::unix_style ^ po::command_line_style::allow_short), vm);
+        po::notify(vm);
+    }
+    catch (const boost::program_options::error& e)     // todo:: find out what is the more specific exception class thrown by program options
+    {
+        log_os << "\nERROR: Exception thrown by option parser: " << e.what() << "\n";
+        po_parse_fail=true;
+    }
+
+    if ((argc<=1) || (vm.count("help")) || po_parse_fail)
+    {
+        usage(log_os,prog,visible);
+    }
+
+    // fast check of config state:
+    if (opt.graphFilename.empty())
+    {
+        usage(log_os,prog,visible,"Must specify sv locus graph file");
+    }
+    if (! boost::filesystem::exists(opt.graphFilename))
+    {
+        usage(log_os,prog,visible,"SV locus graph file does not exist");
+    }
+}
+
diff --git a/src/c++/lib/applications/CheckSVLoci/CSLOptions.hh b/src/c++/lib/applications/CheckSVLoci/CSLOptions.hh
new file mode 100644
index 0000000..9f19d51
--- /dev/null
+++ b/src/c++/lib/applications/CheckSVLoci/CSLOptions.hh
@@ -0,0 +1,46 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "common/Program.hh"
+
+#include <string>
+
+
+
+struct CSLOptions
+{
+
+    CSLOptions()
+    {}
+
+    std::string graphFilename;
+};
+
+
+void
+parseCSLOptions(const illumina::Program& prog,
+                int argc, char* argv[],
+                CSLOptions& opt);
diff --git a/src/c++/lib/applications/CheckSVLoci/CheckSVLoci.cpp b/src/c++/lib/applications/CheckSVLoci/CheckSVLoci.cpp
new file mode 100644
index 0000000..4fdefc0
--- /dev/null
+++ b/src/c++/lib/applications/CheckSVLoci/CheckSVLoci.cpp
@@ -0,0 +1,53 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "CheckSVLoci.hh"
+#include "CSLOptions.hh"
+
+#include "blt_util/log.hh"
+#include "svgraph/SVLocusSet.hh"
+
+
+
+static
+void
+runCSL(const CSLOptions& opt)
+{
+    SVLocusSet set;
+    set.load(opt.graphFilename.c_str());
+    set.finalize();
+    set.checkState(true,true);
+}
+
+
+
+void
+CheckSVLoci::
+runInternal(int argc, char* argv[]) const
+{
+    CSLOptions opt;
+
+    parseCSLOptions(*this,argc,argv,opt);
+    runCSL(opt);
+}
diff --git a/src/c++/lib/applications/CheckSVLoci/CheckSVLoci.hh b/src/c++/lib/applications/CheckSVLoci/CheckSVLoci.hh
new file mode 100644
index 0000000..615620a
--- /dev/null
+++ b/src/c++/lib/applications/CheckSVLoci/CheckSVLoci.hh
@@ -0,0 +1,42 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "common/Program.hh"
+
+
+/// estimate per-library information from alignment file(s)
+///
+struct CheckSVLoci : public illumina::Program
+{
+    const char*
+    name() const
+    {
+        return "CheckSVLoci";
+    }
+
+    void
+    runInternal(int argc, char* argv[]) const;
+};
diff --git a/src/c++/lib/applications/DumpSVLoci/CMakeLists.txt b/src/c++/lib/applications/DumpSVLoci/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/applications/DumpSVLoci/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/applications/DumpSVLoci/DSLOptions.cpp b/src/c++/lib/applications/DumpSVLoci/DSLOptions.cpp
new file mode 100644
index 0000000..34a62fd
--- /dev/null
+++ b/src/c++/lib/applications/DumpSVLoci/DSLOptions.cpp
@@ -0,0 +1,111 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "DSLOptions.hh"
+
+#include "blt_util/log.hh"
+#include "common/ProgramUtil.hh"
+
+#include "boost/filesystem.hpp"
+#include "boost/program_options.hpp"
+
+
+
+static
+void
+usage(
+    std::ostream& os,
+    const illumina::Program& prog,
+    const boost::program_options::options_description& visible,
+    const char* msg = nullptr)
+{
+    usage(os, prog, visible, "write binary sv locus graph to stdout", " > graph_dump", msg);
+}
+
+
+
+void
+parseDSLOptions(const illumina::Program& prog,
+                int argc, char* argv[],
+                DSLOptions& opt)
+{
+    namespace po = boost::program_options;
+    po::options_description req("configuration");
+    req.add_options()
+    ("graph-file", po::value(&opt.graphFilename),
+     "sv locus graph file")
+    ("region", po::value(&opt.region),
+     "list nodes in the specified region only. region in samtools format, eg. 'chr1:20-30' (optional)")
+    ("locus-index", po::value(&opt.locusIndex),
+     "dump only the specified locus")
+    ("locus-file", po::value(&opt.locusFilename),
+     "Write a binary sv locus file if locus-index is specified")
+    ;
+
+    po::options_description help("help");
+    help.add_options()
+    ("help,h","print this message");
+
+    po::options_description visible("options");
+    visible.add(req).add(help);
+
+    bool po_parse_fail(false);
+    po::variables_map vm;
+    try
+    {
+        po::store(po::parse_command_line(argc, argv, visible,
+                                         po::command_line_style::unix_style ^ po::command_line_style::allow_short), vm);
+        po::notify(vm);
+    }
+    catch (const boost::program_options::error& e)     // todo:: find out what is the more specific exception class thrown by program options
+    {
+        log_os << "\nERROR: Exception thrown by option parser: " << e.what() << "\n";
+        po_parse_fail=true;
+    }
+
+    if ((argc<=1) || (vm.count("help")) || po_parse_fail)
+    {
+        usage(log_os,prog,visible);
+    }
+
+    // fast check of config state:
+    if (opt.graphFilename.empty())
+    {
+        usage(log_os,prog,visible,"Must specify sv locus graph file");
+    }
+    if (! boost::filesystem::exists(opt.graphFilename))
+    {
+        usage(log_os,prog,visible,"SV locus graph file does not exist");
+    }
+    if (vm.count("locus-index"))
+    {
+        if (! opt.region.empty()) usage(log_os,prog,visible,"locus-index and region cannot be used together");
+        opt.isLocusIndex=true;
+    }
+    if (! opt.locusFilename.empty())
+    {
+        if (! opt.isLocusIndex) usage(log_os,prog,visible,"Must specify sv locus index with locus file");
+    }
+}
+
diff --git a/src/c++/lib/applications/DumpSVLoci/DSLOptions.hh b/src/c++/lib/applications/DumpSVLoci/DSLOptions.hh
new file mode 100644
index 0000000..f6a9316
--- /dev/null
+++ b/src/c++/lib/applications/DumpSVLoci/DSLOptions.hh
@@ -0,0 +1,50 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "common/Program.hh"
+
+#include <string>
+
+
+struct DSLOptions
+{
+    DSLOptions() :
+        isLocusIndex(false),
+        locusIndex(0)
+    {}
+
+    bool isLocusIndex = false;
+    unsigned locusIndex = 0;
+    std::string graphFilename;
+    std::string locusFilename;
+    std::string region;
+};
+
+
+void
+parseDSLOptions(const illumina::Program& prog,
+                int argc, char* argv[],
+                DSLOptions& opt);
diff --git a/src/c++/lib/applications/DumpSVLoci/DumpSVLoci.cpp b/src/c++/lib/applications/DumpSVLoci/DumpSVLoci.cpp
new file mode 100644
index 0000000..051f8d2
--- /dev/null
+++ b/src/c++/lib/applications/DumpSVLoci/DumpSVLoci.cpp
@@ -0,0 +1,94 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "DumpSVLoci.hh"
+#include "DSLOptions.hh"
+
+#include "htsapi/bam_header_util.hh"
+#include "svgraph/SVLocusSet.hh"
+
+#include "blt_util/thirdparty_push.h"
+
+#include "boost/archive/binary_oarchive.hpp"
+
+#include "blt_util/thirdparty_pop.h"
+
+#include <fstream>
+#include <iostream>
+
+
+
+static
+void
+runDSL(const DSLOptions& opt)
+{
+    SVLocusSet set;
+    set.load(opt.graphFilename.c_str());
+
+    const SVLocusSet& cset(set);
+
+    std::ostream& os(std::cout);
+
+    // add this handy map of chromosome id to chromosome label at the start of all output types:
+    os << cset.header << "\n";
+
+    if (! opt.region.empty())
+    {
+        int32_t tid,beginPos,endPos;
+        parse_bam_region(set.header, opt.region.c_str(), tid, beginPos, endPos);
+
+        set.dumpRegion(os,GenomeInterval(tid,beginPos,endPos));
+    }
+    else if (opt.isLocusIndex)
+    {
+        const SVLocus& locus(cset.getLocus(opt.locusIndex));
+        if (opt.locusFilename.empty())
+        {
+            os << locus;
+        }
+        else
+        {
+            std::ofstream ofs(opt.locusFilename.c_str(), std::ios::binary);
+            boost::archive::binary_oarchive oa(ofs);
+            oa << locus;
+        }
+    }
+    else
+    {
+        cset.dump(os);
+    }
+}
+
+
+
+void
+DumpSVLoci::
+runInternal(int argc, char* argv[]) const
+{
+
+    DSLOptions opt;
+
+    parseDSLOptions(*this,argc,argv,opt);
+    runDSL(opt);
+}
diff --git a/src/c++/lib/applications/DumpSVLoci/DumpSVLoci.hh b/src/c++/lib/applications/DumpSVLoci/DumpSVLoci.hh
new file mode 100644
index 0000000..0a59e3c
--- /dev/null
+++ b/src/c++/lib/applications/DumpSVLoci/DumpSVLoci.hh
@@ -0,0 +1,42 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "common/Program.hh"
+
+
+/// estimate per-library information from alignment file(s)
+///
+struct DumpSVLoci : public illumina::Program
+{
+    const char*
+    name() const
+    {
+        return "DumpSVLoci";
+    }
+
+    void
+    runInternal(int argc, char* argv[]) const;
+};
diff --git a/src/c++/lib/applications/EstimateSVLoci/CMakeLists.txt b/src/c++/lib/applications/EstimateSVLoci/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/applications/EstimateSVLoci/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/applications/EstimateSVLoci/ESLOptions.cpp b/src/c++/lib/applications/EstimateSVLoci/ESLOptions.cpp
new file mode 100644
index 0000000..2d0ea34
--- /dev/null
+++ b/src/c++/lib/applications/EstimateSVLoci/ESLOptions.cpp
@@ -0,0 +1,170 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "ESLOptions.hh"
+
+#include "blt_util/log.hh"
+#include "common/ProgramUtil.hh"
+#include "options/AlignmentFileOptionsParser.hh"
+#include "options/ReadScannerOptionsParser.hh"
+#include "options/SVLocusSetOptionsParser.hh"
+#include "options/optionsUtil.hh"
+
+#include "boost/program_options.hpp"
+
+#include <iostream>
+
+
+typedef std::vector<std::string> regions_t;
+
+
+static
+void
+usage(
+    std::ostream& os,
+    const illumina::Program& prog,
+    const boost::program_options::options_description& visible,
+    const char* msg = nullptr)
+{
+    usage(os, prog, visible, "partition sv evidence regions", "", msg);
+}
+
+
+
+static
+void
+checkStandardizeUsageFile(
+    std::ostream& os,
+    const illumina::Program& prog,
+    const boost::program_options::options_description& visible,
+    std::string& filename,
+    const char* fileLabel)
+{
+    std::string errorMsg;
+    if ( checkStandardizeInputFile(filename, fileLabel, errorMsg))
+    {
+        usage(os,prog,visible,errorMsg.c_str());
+    }
+}
+
+
+
+void
+parseESLOptions(
+    const illumina::Program& prog,
+    int argc, char* argv[],
+    ESLOptions& opt)
+{
+    namespace po = boost::program_options;
+    po::options_description req("configuration");
+    req.add_options()
+    ("output-file", po::value(&opt.outputFilename),
+     "write SV Locus graph to file (required)")
+    ("ref", po::value(&opt.referenceFilename),
+     "fasta reference sequence (required)")
+    ("align-stats", po::value(&opt.statsFilename),
+     "pre-computed alignment statistics for the input alignment files (required)")
+    ("chrom-depth", po::value(&opt.chromDepthFilename),
+     "average depth estimate for each chromosome")
+    ("truth-vcf", po::value(&opt.truthVcfFilename),
+     "optional truth VCF file (for testing)")
+    ("region", po::value<regions_t>(),
+     "samtools formatted region, eg. 'chr1:20-30'. May be supplied more than once but regions must not overlap. At least one entry required.")
+    ("rna", po::value(&opt.isRNA)->zero_tokens(),
+     "For RNA input. Changes small fragment handling.")
+    ;
+
+    po::options_description alignDesc(getOptionsDescription(opt.alignFileOpt));
+    po::options_description scanDesc(getOptionsDescription(opt.scanOpt));
+    po::options_description graphDesc(getOptionsDescription(opt.graphOpt));
+
+    po::options_description help("help");
+    help.add_options()
+    ("help,h","print this message");
+
+    po::options_description visible("options");
+    visible.add(alignDesc).add(scanDesc).add(graphDesc).add(req).add(help);
+
+    bool po_parse_fail(false);
+    po::variables_map vm;
+    try
+    {
+        po::store(po::parse_command_line(argc, argv, visible,
+                                         po::command_line_style::unix_style ^ po::command_line_style::allow_short), vm);
+        po::notify(vm);
+    }
+    catch (const boost::program_options::error& e)     // todo:: find out what is the more specific exception class thrown by program options
+    {
+        log_os << "\nERROR: Exception thrown by option parser: " << e.what() << "\n";
+        po_parse_fail=true;
+    }
+
+    if ((argc<=1) || (vm.count("help")) || po_parse_fail)
+    {
+        usage(log_os,prog,visible);
+    }
+
+    if (vm.count("region"))
+    {
+        opt.regions=(boost::any_cast<regions_t>(vm["region"].value()));
+    }
+
+    std::string errorMsg;
+    if (parseOptions(vm, opt.alignFileOpt, errorMsg))
+    {
+        usage(log_os,prog,visible,errorMsg.c_str());
+    }
+    else if (parseOptions(vm, opt.scanOpt, errorMsg))
+    {
+        usage(log_os,prog,visible,errorMsg.c_str());
+    }
+    else if (opt.outputFilename.empty())
+    {
+        usage(log_os,prog,visible,"Must specify a graph output file");
+    }
+    else if (opt.referenceFilename.empty())
+    {
+        usage(log_os,prog,visible,"Must specify a fasta reference file");
+    }
+    else if (opt.regions.empty())
+    {
+        usage(log_os,prog,visible,"Need at least one samtools formatted region");
+    }
+
+    for (const auto& region : opt.regions)
+    {
+        if (region.empty())
+        {
+            usage(log_os,prog,visible,"Empty region argument");
+        }
+    }
+
+    checkStandardizeUsageFile(log_os,prog,visible,opt.statsFilename,"alignment statistics");
+    checkStandardizeUsageFile(log_os,prog,visible,opt.referenceFilename,"reference fasta");
+
+    if (! opt.chromDepthFilename.empty())
+    {
+        checkStandardizeUsageFile(log_os,prog,visible,opt.chromDepthFilename,"chromosome depth");
+    }
+}
diff --git a/src/c++/lib/applications/EstimateSVLoci/ESLOptions.hh b/src/c++/lib/applications/EstimateSVLoci/ESLOptions.hh
new file mode 100644
index 0000000..75e8f03
--- /dev/null
+++ b/src/c++/lib/applications/EstimateSVLoci/ESLOptions.hh
@@ -0,0 +1,61 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "common/Program.hh"
+#include "manta/SVLocusScanner.hh"
+#include "options/AlignmentFileOptions.hh"
+#include "options/ReadScannerOptions.hh"
+#include "options/SVLocusSetOptions.hh"
+
+#include <vector>
+
+
+struct ESLOptions
+{
+    ESLOptions() :
+        graphOpt(SVObservationWeights::observation)  // initialize noise edge filtration parameters
+    {}
+
+    AlignmentFileOptions alignFileOpt;
+    ReadScannerOptions scanOpt;
+    SVLocusSetOptions graphOpt;
+
+    std::string referenceFilename;
+    std::string outputFilename;
+    std::vector<std::string> regions;
+    std::string statsFilename;
+    std::string chromDepthFilename;
+    std::string truthVcfFilename;
+
+    /// TODO remove the need for this bool by having a single overlap pair handler
+    bool isRNA = false;
+};
+
+
+void
+parseESLOptions(const illumina::Program& prog,
+                int argc, char* argv[],
+                ESLOptions& opt);
diff --git a/src/c++/lib/applications/EstimateSVLoci/EstimateSVLoci.cpp b/src/c++/lib/applications/EstimateSVLoci/EstimateSVLoci.cpp
new file mode 100644
index 0000000..d48d4fa
--- /dev/null
+++ b/src/c++/lib/applications/EstimateSVLoci/EstimateSVLoci.cpp
@@ -0,0 +1,198 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "EstimateSVLoci.hh"
+#include "ESLOptions.hh"
+#include "SVLocusSetFinder.hh"
+
+#include "blt_util/input_stream_handler.hh"
+#include "blt_util/log.hh"
+#include "common/OutStream.hh"
+#include "htsapi/bam_header_util.hh"
+#include "manta/SVReferenceUtil.hh"
+
+#include <iostream>
+#include <vector>
+
+//#define DEBUG_ESL
+
+
+
+static
+void
+runESLRegion(
+    const ESLOptions& opt,
+    const std::string& region,
+    SVLocusSet& mergedSet)
+{
+    TimeTracker timer;
+    timer.resume();
+
+    typedef std::shared_ptr<bam_streamer> stream_ptr;
+    std::vector<stream_ptr> bamStreams;
+
+    // setup all data for main alignment loop:
+    for (const std::string& afile : opt.alignFileOpt.alignmentFilename)
+    {
+        stream_ptr tmp(new bam_streamer(afile.c_str(),
+                                        (region.empty()
+                                         ? nullptr
+                                         : region.c_str())));
+        bamStreams.push_back(tmp);
+    }
+
+    const unsigned bamCount(bamStreams.size());
+
+    assert(0 != bamCount);
+
+    // check bam header compatibility:
+    if (bamCount > 1)
+    {
+        /// TODO: provide a better error exception for failed bam header check:
+        const bam_hdr_t& compareHeader(bamStreams[0]->get_header());
+        for (unsigned bamIndex(1); bamIndex<bamCount; ++bamIndex)
+        {
+            const bam_hdr_t& indexHeader(bamStreams[bamIndex]->get_header());
+            if (! check_header_compatibility(compareHeader,indexHeader))
+            {
+                log_os << "ERROR: incompatible bam headers between files:\n"
+                       << "\t" << opt.alignFileOpt.alignmentFilename[0] << "\n"
+                       << "\t" << opt.alignFileOpt.alignmentFilename[bamIndex] << "\n";
+                exit(EXIT_FAILURE);
+            }
+        }
+    }
+
+    // assume headers compatible after this point....
+
+    const bam_hdr_t& header(bamStreams[0]->get_header());
+    const bam_header_info bamHeader(header);
+
+    int32_t tid(0), beginPos(0), endPos(0);
+    parse_bam_region(bamHeader,region.c_str(),tid,beginPos,endPos);
+
+    const GenomeInterval scanRegion(tid,beginPos,endPos);
+#ifdef DEBUG_ESL
+    static const std::string log_tag("EstimateSVLoci");
+    log_os << log_tag << " scanRegion= " << scanRegion << "\n";
+#endif
+
+    // grab the reference for segment we're estimating plus a buffer around the segment edges:
+    static const unsigned refEdgeBufferSize(500);
+
+    reference_contig_segment refSegment;
+    getIntervalReferenceSegment(opt.referenceFilename, bamHeader, refEdgeBufferSize, scanRegion, refSegment);
+
+    SVLocusSetFinder locusFinder(opt, scanRegion, bamHeader, refSegment);
+
+    input_stream_data sdata;
+    for (unsigned bamIndex(0); bamIndex<bamCount; ++bamIndex)
+    {
+        sdata.register_reads(*bamStreams[bamIndex],bamIndex);
+    }
+
+    // loop through alignments:
+    input_stream_handler sinput(sdata);
+    while (sinput.next())
+    {
+        const input_record_info current(sinput.get_current());
+
+        if (current.itype != INPUT_TYPE::READ)
+        {
+            log_os << "ERROR: invalid input condition.\n";
+            exit(EXIT_FAILURE);
+        }
+
+        const bam_streamer& readStream(*bamStreams[current.sample_no]);
+        const bam_record& read(*(readStream.get_record_ptr()));
+
+        locusFinder.update(read, current.sample_no);
+    }
+
+    // finished updating:
+    locusFinder.flush();
+    timer.stop();
+    const CpuTimes totalTimes(timer.getTimes());
+#ifdef DEBUG_ESL
+    log_os << log_tag << " found " << locusFinder.getLocusSet().size() << " loci. \n";
+    log_os << log_tag << " totalTime: ";
+    totalTimes.reportHr(log_os);
+    log_os << "\n";
+#endif
+    locusFinder.setBuildTime(totalTimes);
+
+    const bool isMultiRegion(opt.regions.size()>1);
+
+    if (! isMultiRegion)
+    {
+        locusFinder.getLocusSet().save(opt.outputFilename.c_str());
+    }
+    else
+    {
+        if (mergedSet.empty())
+        {
+            mergedSet = locusFinder.getLocusSet();
+        }
+        else
+        {
+            mergedSet.merge(locusFinder.getLocusSet());
+        }
+    }
+}
+
+
+
+static
+void
+runESL(const ESLOptions& opt)
+{
+    {
+        // early test that we have permission to write to output file
+        OutStream outs(opt.outputFilename);
+    }
+
+    SVLocusSet mergedSet;
+
+    for (const auto& region : opt.regions)
+    {
+        runESLRegion(opt, region, mergedSet);
+    }
+
+    if (! mergedSet.empty())
+    {
+        mergedSet.save(opt.outputFilename.c_str());
+    }
+}
+
+
+
+void
+EstimateSVLoci::
+runInternal(int argc, char* argv[]) const
+{
+    ESLOptions opt;
+
+    parseESLOptions(*this,argc,argv,opt);
+    runESL(opt);
+}
diff --git a/src/c++/lib/applications/EstimateSVLoci/EstimateSVLoci.hh b/src/c++/lib/applications/EstimateSVLoci/EstimateSVLoci.hh
new file mode 100644
index 0000000..e42cb66
--- /dev/null
+++ b/src/c++/lib/applications/EstimateSVLoci/EstimateSVLoci.hh
@@ -0,0 +1,42 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "common/Program.hh"
+
+
+/// estimate per-library information from alignment file(s)
+///
+struct EstimateSVLoci : public illumina::Program
+{
+    const char*
+    name() const
+    {
+        return "EstimateSVLoci";
+    }
+
+    void
+    runInternal(int argc, char* argv[]) const;
+};
diff --git a/src/c++/lib/applications/EstimateSVLoci/SVLocusSetFinder.cpp b/src/c++/lib/applications/EstimateSVLoci/SVLocusSetFinder.cpp
new file mode 100644
index 0000000..c608f5c
--- /dev/null
+++ b/src/c++/lib/applications/EstimateSVLoci/SVLocusSetFinder.cpp
@@ -0,0 +1,294 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "SVLocusSetFinder.hh"
+
+#include "blt_util/log.hh"
+#include "htsapi/align_path_bam_util.hh"
+#include "manta/ChromDepthFilterUtil.hh"
+
+#include <iostream>
+
+
+
+namespace STAGE
+{
+enum index_t
+{
+    HEAD,
+    DENOISE,
+    CLEAR_DEPTH
+};
+
+
+static
+stage_data
+getStageData(
+    const unsigned denoiseBorderSize)
+{
+    static const unsigned clearDepthBorderSize(10);
+
+    stage_data sd;
+    sd.add_stage(HEAD);
+    sd.add_stage(DENOISE, HEAD, denoiseBorderSize);
+    sd.add_stage(CLEAR_DEPTH, HEAD, clearDepthBorderSize);
+
+    return sd;
+}
+}
+
+
+
+static const unsigned depthBufferCompression = 16;
+
+
+
+SVLocusSetFinder::
+SVLocusSetFinder(
+    const ESLOptions& opt,
+    const GenomeInterval& scanRegion,
+    const bam_header_info& bamHeader,
+    const reference_contig_segment& refSeq) :
+    _isAlignmentTumor(opt.alignFileOpt.isAlignmentTumor),
+    _scanRegion(scanRegion),
+    _stageman(
+        STAGE::getStageData(REGION_DENOISE_BORDER),
+        pos_range(
+            scanRegion.range.begin_pos(),
+            scanRegion.range.end_pos()),
+        *this),
+    _svLoci(opt.graphOpt),
+    _depth(depthBufferCompression),
+    _isScanStarted(false),
+    _isInDenoiseRegion(false),
+    _denoisePos(0),
+    _readScanner(opt.scanOpt,opt.statsFilename,opt.alignFileOpt.alignmentFilename, opt.isRNA),
+    _isMaxDepth(false),
+    _maxDepth(0),
+    _bamHeader(bamHeader),
+    _refSeq(refSeq)
+{
+    const ChromDepthFilterUtil dFilter(opt.chromDepthFilename, opt.scanOpt.maxDepthFactor, bamHeader);
+    _isMaxDepth=dFilter.isMaxDepthFilter();
+    if (_isMaxDepth)
+    {
+        _maxDepth=dFilter.maxDepth(scanRegion.tid);
+    }
+
+    const unsigned sampleCount(opt.alignFileOpt.alignmentFilename.size());
+    _svLoci.getCounts().setSampleCount(sampleCount);
+
+    for (unsigned sampleIndex(0); sampleIndex<sampleCount; ++sampleIndex)
+    {
+        _svLoci.getCounts().getSampleCounts(sampleIndex).sampleSource = opt.alignFileOpt.alignmentFilename[sampleIndex];
+    }
+
+    _svLoci.header = _bamHeader;
+    updateDenoiseRegion();
+}
+
+
+
+void
+SVLocusSetFinder::
+updateDenoiseRegion()
+{
+    _denoiseRegion=_scanRegion;
+
+    known_pos_range2& range(_denoiseRegion.range);
+    if (range.begin_pos() > 0)
+    {
+        range.set_begin_pos(range.begin_pos()+REGION_DENOISE_BORDER);
+    }
+
+    bool isEndBorder(true);
+    if (static_cast<int32_t>(_svLoci.header.chrom_data.size()) > _denoiseRegion.tid)
+    {
+        const pos_t chromEndPos(_svLoci.header.chrom_data[_denoiseRegion.tid].length);
+        isEndBorder=(range.end_pos() < chromEndPos);
+    }
+
+    if (isEndBorder)
+    {
+        range.set_end_pos(range.end_pos()-REGION_DENOISE_BORDER);
+    }
+
+#ifdef DEBUG_SFINDER
+    log_os << __FUNCTION__ << ": " << _denoiseRegion << "\n";
+#endif
+
+}
+
+
+
+void
+SVLocusSetFinder::
+process_pos(const int stage_no,
+            const pos_t pos)
+{
+#ifdef DEBUG_SFINDER
+    log_os << __FUNCTION__ << ": stage_no: " << stage_no << " pos: " << pos << "\n";
+#endif
+
+    if     (stage_no == STAGE::HEAD)
+    {
+        // pass
+    }
+    else if (stage_no == STAGE::DENOISE)
+    {
+        static const pos_t denoiseMinChunk(1000);
+
+        if (_denoiseRegion.range.is_pos_intersect(pos))
+        {
+
+#ifdef DEBUG_SFINDER
+            log_os << __FUNCTION__ << ": pos intersect. pos: " << pos << " dnRegion: " << _denoiseRegion << " is in region: " << _isInDenoiseRegion << "\n";
+#endif
+
+            if (! _isInDenoiseRegion)
+            {
+                _denoisePos=_denoiseRegion.range.begin_pos();
+                _isInDenoiseRegion=true;
+            }
+
+            if ( (1 + pos-_denoisePos) >= denoiseMinChunk)
+            {
+                _svLoci.cleanRegion(GenomeInterval(_denoiseRegion.tid, _denoisePos, (pos+1)));
+                _denoisePos = (pos+1);
+            }
+        }
+        else
+        {
+
+#ifdef DEBUG_SFINDER
+            log_os << __FUNCTION__ << ": no pos intersect. pos: " << pos << " dnRegion: " << _denoiseRegion << " is in region: " << _isInDenoiseRegion << "\n";
+#endif
+
+            if (_isInDenoiseRegion)
+            {
+                if ( (_denoiseRegion.range.end_pos()-_denoisePos) > 0)
+                {
+                    _svLoci.cleanRegion(GenomeInterval(_denoiseRegion.tid, _denoisePos, _denoiseRegion.range.end_pos()));
+                    _denoisePos = _denoiseRegion.range.end_pos();
+                }
+                _isInDenoiseRegion=false;
+            }
+        }
+    }
+    else if (stage_no == STAGE::CLEAR_DEPTH)
+    {
+        _depth.clear_pos(pos);
+    }
+    else
+    {
+        assert(false && "Unexpected stage id");
+    }
+}
+
+
+
+void
+SVLocusSetFinder::
+addToDepthBuffer(
+    const unsigned defaultReadGroupIndex,
+    const bam_record& bamRead)
+{
+    if (! _isMaxDepth) return;
+
+    // estimate depth from normal sample only:
+    if (_isAlignmentTumor[defaultReadGroupIndex]) return;
+
+    // depth estimation relies on a simple filtration criteria to stay in sync with the chromosome mean
+    // depth estimates:
+    if (bamRead.is_unmapped()) return;
+
+    const pos_t refPos(bamRead.pos()-1);
+
+    /// stick to a simple approximation -- ignore CIGAR string and just look at the read length:
+    const pos_t readSize(bamRead.read_size());
+    _depth.inc(refPos,readSize);
+}
+
+
+
+void
+SVLocusSetFinder::
+update(
+    const bam_record& bamRead,
+    const unsigned defaultReadGroupIndex)
+{
+    _isScanStarted=true;
+
+    const bool isTumor(_isAlignmentTumor[defaultReadGroupIndex]);
+    if (! isTumor)
+    {
+        // depth estimation relies on a simple filtration criteria to stay in sync with the chromosome mean
+        // depth estimates using samtools idxstats:
+        if (! bamRead.is_unmapped())
+        {
+            addToDepthBuffer(defaultReadGroupIndex, bamRead);
+        }
+    }
+
+    // note we currently filter unmapped but allow unpaired/unmapped mate to come through this screen
+    // these reads can still show an assembly signal as individual reads, or by contributing shadows
+    if (SVLocusScanner::isMappedReadFilteredCore(bamRead)) return;
+
+    if (_isMaxDepth)
+    {
+        if (_depth.val(bamRead.pos()-1) > _maxDepth) return;
+    }
+
+    SampleCounts& counts(_svLoci.getCounts().getSampleCounts(defaultReadGroupIndex));
+    SampleReadInputCounts& incounts(counts.input);
+    if (bamRead.map_qual() < _readScanner.getMinMapQ())
+    {
+        incounts.minMapq++;
+        return;
+    }
+
+    if (! _readScanner.isSVEvidence(bamRead,defaultReadGroupIndex,_refSeq,&(incounts.evidenceCount))) return;
+
+#ifdef DEBUG_SFINDER
+    log_os << __FUNCTION__ << ": Accepted read. isNonCompressedAnomalous "  << isNonCompressedAnomalous << " is Local assm evidence: " << isLocalAssemblyEvidence << " read: " << bamRead << "\n";
+#endif
+
+    // check that this read starts in our scan region:
+    if (! _scanRegion.range.is_pos_intersect(bamRead.pos()-1)) return;
+
+    _stageman.handle_new_pos_value(bamRead.pos()-1);
+
+    std::vector<SVLocus> loci;
+
+    SampleEvidenceCounts& eCounts(counts.evidence);
+
+    _readScanner.getSVLoci(bamRead, defaultReadGroupIndex, _bamHeader,
+                           _refSeq, loci, eCounts);
+
+    for (const SVLocus& locus : loci)
+    {
+        if (locus.empty()) continue;
+        _svLoci.merge(locus);
+    }
+}
diff --git a/src/c++/lib/applications/EstimateSVLoci/SVLocusSetFinder.hh b/src/c++/lib/applications/EstimateSVLoci/SVLocusSetFinder.hh
new file mode 100644
index 0000000..996285a
--- /dev/null
+++ b/src/c++/lib/applications/EstimateSVLoci/SVLocusSetFinder.hh
@@ -0,0 +1,130 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "ESLOptions.hh"
+
+#include "blt_util/depth_buffer.hh"
+#include "blt_util/pos_processor_base.hh"
+#include "blt_util/stage_manager.hh"
+#include "htsapi/bam_record.hh"
+#include "manta/SVLocusScanner.hh"
+#include "svgraph/SVLocusSet.hh"
+
+#include <iosfwd>
+#include <string>
+#include <vector>
+
+
+//#define DEBUG_SFINDER
+
+
+
+/// estimate an SVLocusSet
+///
+struct SVLocusSetFinder : public pos_processor_base
+{
+    SVLocusSetFinder(
+        const ESLOptions& opt,
+        const GenomeInterval& scanRegion,
+        const bam_header_info& bamHeader,
+        const reference_contig_segment& refSeq);
+
+    ~SVLocusSetFinder()
+    {
+        flush();
+    }
+
+    /// index is the read group index to use by in the absence of an RG tag
+    /// (for now RGs are ignored for the purpose of gathering insert stats)
+    ///
+    void
+    update(
+        const bam_record& bamRead,
+        const unsigned defaultReadGroupIndex);
+
+    const SVLocusSet&
+    getLocusSet()
+    {
+        return _svLoci;
+    }
+
+    // flush any cached values built up during the update process
+    void
+    flush()
+    {
+        _stageman.reset();
+    }
+
+    void
+    setBuildTime(const CpuTimes& t)
+    {
+        _svLoci.setBuildTime(t);
+    }
+
+private:
+
+    void
+    process_pos(const int stage_no,
+                const pos_t pos);
+
+    void
+    updateDenoiseRegion();
+
+    void
+    addToDepthBuffer(
+        const unsigned defaultReadGroupIndex,
+        const bam_record& bamRead);
+
+    // TODO -- compute this number from read insert ranges:
+    enum hack_t
+    {
+        REGION_DENOISE_BORDER = 5000    ///< length in bases on the beginning and the end of scan range which is excluded from in-line graph de-noising
+    };
+
+    /////////////////////////////////////////////////
+    // data:
+    const std::vector<bool> _isAlignmentTumor;
+    const GenomeInterval _scanRegion;
+    GenomeInterval _denoiseRegion;
+    stage_manager _stageman;
+    SVLocusSet _svLoci;
+
+    depth_buffer_compressible _depth; ///< track depth for the purpose of filtering high-depth regions
+
+    bool _isScanStarted;
+
+    bool _isInDenoiseRegion;
+    pos_t _denoisePos;
+
+    SVLocusScanner _readScanner;
+
+    bool _isMaxDepth;
+    float _maxDepth;
+
+    const bam_header_info& _bamHeader;
+    const reference_contig_segment& _refSeq;
+};
+
diff --git a/src/c++/lib/applications/GenerateSVCandidates/CMakeLists.txt b/src/c++/lib/applications/GenerateSVCandidates/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/applications/GenerateSVCandidates/EdgeOptions.hh b/src/c++/lib/applications/GenerateSVCandidates/EdgeOptions.hh
new file mode 100644
index 0000000..65d4568
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/EdgeOptions.hh
@@ -0,0 +1,49 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+
+/// options for SVLocusGraph edge iteration and noise edge filtration
+struct LocusEdgeOptions
+{
+    unsigned locusIndex = 0; ///< if isLocusIndex, report this locus only
+    bool isNodeIndex1 = false; ///< if true, generate candidates for all edges touching a specifc node in one locus. Assumes isLocusIndex is true
+    unsigned nodeIndex1 = 0;
+    bool isNodeIndex2 = false; ///< if true, generate candidates for only the edge from node1 to node2 in one locus. Assumes isLocusIndex & isNodeIndex1 are true
+    unsigned nodeIndex2 = 0;
+};
+
+
+/// options for SVLocusGraph edge iteration and noise edge filtration
+struct EdgeOptions
+{
+    unsigned binCount = 1; ///< divide all edges in the graph into binCount bins of approx equal complexity
+    unsigned binIndex = 0; ///< out of binCount bins, iterate through the edges in this bin only
+
+    bool isLocusIndex = false; ///< if true, generate candidates for a specific SVgraph locus only, and ignore binCount/binIndex
+    LocusEdgeOptions locusOpt;
+
+    unsigned graphNodeMaxEdgeCount = 10; ///< if both nodes of an edge have an edge count higher than this, then skip evaluation of this edge, set to 0 to turn this filtration off
+};
diff --git a/src/c++/lib/applications/GenerateSVCandidates/EdgeOptionsParser.cpp b/src/c++/lib/applications/GenerateSVCandidates/EdgeOptionsParser.cpp
new file mode 100644
index 0000000..5f08110
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/EdgeOptionsParser.cpp
@@ -0,0 +1,114 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "EdgeOptionsParser.hh"
+
+#include "blt_util/parse_util.hh"
+#include "blt_util/string_util.hh"
+
+
+namespace
+{
+const char locusIndexKey[] = "locus-index";
+}
+
+
+
+boost::program_options::options_description
+getOptionsDescription(
+    EdgeOptions& opt)
+{
+    namespace po = boost::program_options;
+
+    po::options_description optdesc("edge-selection");
+    optdesc.add_options()
+    ("bin-count", po::value(&opt.binCount)->default_value(opt.binCount),
+     "Specify how many bins the SV candidate problem should be divided into, where bin-index can be used to specify which bin to solve")
+    ("bin-index", po::value(&opt.binIndex)->default_value(opt.binIndex),
+     "specify which bin to solve when the SV candidate problem is subdivided into bins. Value must bin in [0,bin-count)")
+    (locusIndexKey, po::value<std::string>(),
+     "Instead of solving for all SV candidates in a bin, solve for candidates of a particular locus or edge."
+     " If this argument is specified then bin-index is ignored."
+     " Argument can be one of { locusIndex , locusIndex:nodeIndex , locusIndex:nodeIndex:nodeIndex },"
+     " which will run an entire locus, all edges connected to one node in a locus or a single edge, respectively.")
+    ;
+    return optdesc;
+}
+
+
+
+bool
+parseOptions(
+    const boost::program_options::variables_map& vm,
+    EdgeOptions& opt,
+    std::string& errorMsg)
+{
+    errorMsg.clear();
+
+    if (vm.count(locusIndexKey))
+    {
+        using namespace illumina::blt_util;
+
+        const std::string& locusString(boost::any_cast<std::string>(vm[locusIndexKey].value()));
+
+        std::vector<std::string> indices;
+        split_string(locusString, ':', indices);
+        if (indices.size() > 3)
+        {
+            errorMsg="locus-index argument can have no more than 3 colon separated segments";
+        }
+
+        assert(! indices.empty());
+
+        opt.isLocusIndex = true;
+        {
+            LocusEdgeOptions& lopt(opt.locusOpt);
+            lopt.locusIndex = parse_unsigned_str(indices[0]);
+            if (indices.size() > 1)
+            {
+                lopt.isNodeIndex1 = true;
+                lopt.nodeIndex1 = parse_unsigned_str(indices[1]);
+                if (indices.size() > 2)
+                {
+                    lopt.isNodeIndex2 = true;
+                    lopt.nodeIndex2 = parse_unsigned_str(indices[2]);
+                }
+            }
+        }
+    }
+
+    if (errorMsg.empty())
+    {
+        if (opt.binCount < 1)
+        {
+            errorMsg="bin-count must be 1 or greater";
+        }
+        else if (opt.binIndex >= opt.binCount)
+        {
+            errorMsg="bin-index must be in range [0,bin-count)";
+        }
+    }
+
+    return (! errorMsg.empty());
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/EdgeOptionsParser.hh b/src/c++/lib/applications/GenerateSVCandidates/EdgeOptionsParser.hh
new file mode 100644
index 0000000..ef724e1
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/EdgeOptionsParser.hh
@@ -0,0 +1,47 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "EdgeOptions.hh"
+
+#include "boost/program_options.hpp"
+
+#include <string>
+
+
+boost::program_options::options_description
+getOptionsDescription(
+    EdgeOptions& opt);
+
+
+/// additional parsing beyond default boost::program_options behaviors,
+/// and arg validation
+///
+/// \return is valid
+bool
+parseOptions(
+    const boost::program_options::variables_map& vm,
+    EdgeOptions& opt,
+    std::string& errorMsg);
diff --git a/src/c++/lib/applications/GenerateSVCandidates/EdgeRetriever.hh b/src/c++/lib/applications/GenerateSVCandidates/EdgeRetriever.hh
new file mode 100644
index 0000000..45ab10e
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/EdgeRetriever.hh
@@ -0,0 +1,60 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "svgraph/EdgeInfo.hh"
+#include "svgraph/SVLocusSet.hh"
+
+
+/// provide an iterator over edges in a set of SV locus graphs
+///
+struct EdgeRetriever
+{
+    EdgeRetriever(
+        const SVLocusSet& set,
+        const unsigned graphNodeMaxEdgeCount) :
+        _set(set),
+        _graphNodeMaxEdgeCount(graphNodeMaxEdgeCount)
+    {}
+
+    virtual
+    ~EdgeRetriever()
+    {}
+
+    virtual
+    bool
+    next() = 0;
+
+    const EdgeInfo&
+    getEdge() const
+    {
+        return _edge;
+    }
+
+protected:
+    const SVLocusSet& _set;
+    const unsigned _graphNodeMaxEdgeCount;
+    EdgeInfo _edge;
+};
diff --git a/src/c++/lib/applications/GenerateSVCandidates/EdgeRetrieverBin.cpp b/src/c++/lib/applications/GenerateSVCandidates/EdgeRetrieverBin.cpp
new file mode 100644
index 0000000..d8a00cb
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/EdgeRetrieverBin.cpp
@@ -0,0 +1,246 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "EdgeRetrieverBin.hh"
+
+#include <cassert>
+
+
+//#define DEBUG_EDGER
+
+#ifdef DEBUG_EDGER
+#include <iostream>
+#include "blt_util/log.hh"
+#endif
+
+
+
+static
+unsigned long
+getBoundaryCount(
+    const double binCount,
+    const double binIndex,
+    const double totalCount)
+{
+    return static_cast<unsigned>(std::floor((totalCount*binIndex)/binCount));
+}
+
+
+
+EdgeRetrieverBin::
+EdgeRetrieverBin(
+    const SVLocusSet& set,
+    const unsigned graphNodeMaxEdgeCount,
+    const unsigned binCount,
+    const unsigned binIndex) :
+    EdgeRetriever(set,graphNodeMaxEdgeCount),
+    _headCount(0)
+{
+    assert(binCount > 0);
+    assert(binIndex < binCount);
+
+    const unsigned long totalObservationCount(_set.totalObservationCount());
+    _beginCount=(getBoundaryCount(binCount,binIndex,totalObservationCount));
+    _endCount=(getBoundaryCount(binCount,binIndex+1,totalObservationCount));
+
+#ifdef DEBUG_EDGER
+    log_os << "EDGER: bi,bc,begin,end: "
+           << binIndex << " "
+           << binCount << " "
+           << _beginCount << " "
+           << _endCount << "\n";
+#endif
+}
+
+
+
+void
+EdgeRetrieverBin::
+jumpToFirstEdge()
+{
+    typedef SVLocusEdgesType::const_iterator edgeiter_t;
+
+    const bool isFilterNodes(_graphNodeMaxEdgeCount>0);
+    const unsigned setSize(_set.size());
+
+    // first catch headCount up to the begin edge if required:
+    while (true)
+    {
+        assert(_edge.locusIndex < setSize);
+
+        const SVLocus& locus(_set.getLocus(_edge.locusIndex));
+        const unsigned locusObservationCount(locus.totalObservationCount());
+
+        if ((_headCount+locusObservationCount) > _beginCount)
+        {
+            const unsigned locusSize(locus.size());
+            while (_edge.nodeIndex1 < locusSize)
+            {
+                const SVLocusNode& node1(locus.getNode(_edge.nodeIndex1));
+                const bool isEdgeFilterNode1(isFilterNodes && (node1.size()>_graphNodeMaxEdgeCount));
+
+                const SVLocusEdgeManager node1Manager(node1.getEdgeManager());
+                edgeiter_t edgeIter(node1Manager.getMap().lower_bound(_edge.nodeIndex1));
+                const edgeiter_t edgeiterEnd(node1Manager.getMap().cend());
+
+                for (; edgeIter != edgeiterEnd; ++edgeIter)
+                {
+                    unsigned edgeCount(edgeIter->second.getCount());
+                    const bool isSelfEdge(edgeIter->first == _edge.nodeIndex1);
+                    if (! isSelfEdge) edgeCount += locus.getEdge(edgeIter->first,_edge.nodeIndex1).getCount();
+                    _headCount += edgeCount;
+                    if (_headCount > _beginCount)
+                    {
+                        _edge.nodeIndex2 = edgeIter->first;
+
+                        // if both nodes have high edge counts we filter out the edge:
+                        if (isEdgeFilterNode1)
+                        {
+                            const SVLocusNode& node2(locus.getNode(_edge.nodeIndex2));
+                            const bool isEdgeFilterNode2(node2.size()>_graphNodeMaxEdgeCount);
+                            if (isEdgeFilterNode2)
+                            {
+#ifdef DEBUG_EDGER
+                                log_os << "EDGER: jump filtering @ hc: " << _headCount << "\n";
+#endif
+                                continue;
+                            }
+                        }
+                        return;
+                    }
+                }
+
+                _edge.nodeIndex1++;
+            }
+            assert(_headCount >= _beginCount);
+        }
+        _headCount += locusObservationCount;
+        _edge.locusIndex++;
+    }
+
+    assert(false && "jumpToFirstEdge: invalid state");
+}
+
+
+
+void
+EdgeRetrieverBin::
+advanceEdge()
+{
+    typedef SVLocusEdgesType::const_iterator edgeiter_t;
+
+    const bool isFilterNodes(_graphNodeMaxEdgeCount>0);
+    const unsigned setSize(_set.size());
+
+    if (0 != _headCount) _edge.nodeIndex2++;
+
+    bool isLastFiltered(false);
+
+    while (true)
+    {
+        if (isLastFiltered && (_edge.locusIndex == setSize))
+        {
+            _headCount = (_endCount + 1);
+            return;
+        }
+        assert(_edge.locusIndex < setSize);
+
+        const SVLocus& locus(_set.getLocus(_edge.locusIndex));
+        const unsigned locusSize(locus.size());
+
+        while (_edge.nodeIndex1 < locusSize)
+        {
+            const SVLocusNode& node1(locus.getNode(_edge.nodeIndex1));
+            const bool isEdgeFilterNode1(isFilterNodes && (node1.size()>_graphNodeMaxEdgeCount));
+            const SVLocusEdgeManager node1Manager(node1.getEdgeManager());
+            edgeiter_t edgeIter(node1Manager.getMap().lower_bound(_edge.nodeIndex2));
+            const edgeiter_t edgeIterEnd(node1Manager.getMap().cend());
+
+            for (; edgeIter != edgeIterEnd; ++edgeIter)
+            {
+                unsigned edgeCount(edgeIter->second.getCount());
+                const bool isSelfEdge(edgeIter->first == _edge.nodeIndex1);
+                if (! isSelfEdge) edgeCount += locus.getEdge(edgeIter->first,_edge.nodeIndex1).getCount();
+                _headCount += edgeCount;
+                _edge.nodeIndex2 = edgeIter->first;
+
+                // if both nodes have high edge counts we filter out the edge:
+                if (isEdgeFilterNode1)
+                {
+                    const SVLocusNode& node2(locus.getNode(_edge.nodeIndex2));
+                    const bool isEdgeFilterNode2(node2.size()>_graphNodeMaxEdgeCount);
+                    if (isEdgeFilterNode2)
+                    {
+#ifdef DEBUG_EDGER
+                        log_os << "EDGER: advance filtering @ hc: " << _headCount << "\n";
+#endif
+                        isLastFiltered=true;
+                        continue;
+                    }
+                }
+
+                return;
+            }
+            ++_edge.nodeIndex1;
+            _edge.nodeIndex2=_edge.nodeIndex1;
+        }
+        ++_edge.locusIndex;
+        _edge.nodeIndex1=0;
+        _edge.nodeIndex2=0;
+    }
+
+    assert(false && "advanceEdge: invalid state");
+}
+
+
+
+bool
+EdgeRetrieverBin::
+next()
+{
+#ifdef DEBUG_EDGER
+    log_os << "EDGER: start next hc: " << _headCount << "\n";
+#endif
+
+    if (_headCount >= _endCount) return false;
+
+    // first catch headCount up to the begin edge if required:
+    if (_headCount < _beginCount)
+    {
+        jumpToFirstEdge();
+#ifdef DEBUG_EDGER
+        log_os << "EDGER: jumped hc: " << _headCount << " " << _edge  << "\n";
+#endif
+    }
+    else
+    {
+        advanceEdge();
+#ifdef DEBUG_EDGER
+        log_os << "EDGER: advanced hc: " << _headCount << " " << _edge  << "\n";
+#endif
+    }
+
+    assert(_headCount >= _beginCount);
+    return (_headCount <= _endCount);
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/EdgeRetrieverBin.hh b/src/c++/lib/applications/GenerateSVCandidates/EdgeRetrieverBin.hh
new file mode 100644
index 0000000..4974820
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/EdgeRetrieverBin.hh
@@ -0,0 +1,66 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "EdgeRetriever.hh"
+
+
+/// provide an iterator over edges in a set of SV locus graphs
+///
+/// designed to allow parallelization of the graph processing by
+/// dividing iteration into a set of bins with similar total edge
+/// observation counts
+///
+struct EdgeRetrieverBin final : public EdgeRetriever
+{
+    /// \param[in] graphNodeMaxEdgeCount filtration parameter for skipping edges
+    ///            from highly connected nodes (set to zero to disable)
+    /// \param[in] binCount total number of parallel bins, must be 1 or greater
+    /// \param[in] binIndex parallel bin id, must be less than binCount
+    EdgeRetrieverBin(
+        const SVLocusSet& set,
+        const unsigned graphNodeMaxEdgeCount,
+        const unsigned binCount,
+        const unsigned binIndex);
+
+    bool
+    next() override;
+
+private:
+    void
+    jumpToFirstEdge();
+
+    void
+    advanceEdge();
+
+    /// _beginCount and _endCount provide the observation range for the bin we're retrieving.
+    /// these values should be contant following the ctor
+    unsigned long _beginCount;
+    unsigned long _endCount;
+
+    /// _headCount is a tracking index of cumulative observation count as we step through
+    /// the graph
+    unsigned long _headCount;
+};
diff --git a/src/c++/lib/applications/GenerateSVCandidates/EdgeRetrieverJumpBin.cpp b/src/c++/lib/applications/GenerateSVCandidates/EdgeRetrieverJumpBin.cpp
new file mode 100644
index 0000000..27add6a
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/EdgeRetrieverJumpBin.cpp
@@ -0,0 +1,163 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "EdgeRetrieverJumpBin.hh"
+
+#include <cassert>
+
+//#define DEBUG_EDGER
+
+#ifdef DEBUG_EDGER
+#include <iostream>
+#include "blt_util/log.hh"
+#endif
+
+
+
+EdgeRetrieverJumpBin::
+EdgeRetrieverJumpBin(
+    const SVLocusSet& set,
+    const unsigned graphNodeMaxEdgeCount,
+    const unsigned binCount,
+    const unsigned binIndex) :
+    EdgeRetriever(set,graphNodeMaxEdgeCount),
+    _binCount(binCount),
+    _binIndex(binIndex),
+    _edgeIndex(0),
+    _avgBinTotalCount(0),
+    _binTotalCount(_binCount,0)
+{
+    assert(binCount > 0);
+    assert(binIndex < binCount);
+
+    const unsigned long totalObservationCount(_set.totalObservationCount());
+    _avgBinTotalCount=(1+totalObservationCount/_binCount);
+
+#ifdef DEBUG_EDGER
+    log_os << "EDGER: bi,bc,avg: "
+           << binIndex << " "
+           << binCount << " "
+           << _avgBinTotalCount << "\n";
+#endif
+}
+
+
+
+void
+EdgeRetrieverJumpBin::
+advanceEdge()
+{
+    typedef SVLocusEdgesType::const_iterator edgeiter_t;
+
+    const bool isFilterNodes(_graphNodeMaxEdgeCount>0);
+
+    // advance to next edge unless this is the first iteration:
+    if (0 != _edgeIndex) _edge.nodeIndex2++;
+
+    while (_edge.locusIndex < _set.size())
+    {
+        const SVLocus& locus(_set.getLocus(_edge.locusIndex));
+        while (_edge.nodeIndex1<locus.size())
+        {
+            const SVLocusNode& node1(locus.getNode(_edge.nodeIndex1));
+            const bool isEdgeFilterNode1(isFilterNodes && (node1.size()>_graphNodeMaxEdgeCount));
+            const SVLocusEdgeManager node1Manager(node1.getEdgeManager());
+            edgeiter_t edgeIter(node1Manager.getMap().lower_bound(_edge.nodeIndex2));
+            const edgeiter_t edgeIterEnd(node1Manager.getMap().cend());
+
+            for (; edgeIter != edgeIterEnd; ++edgeIter)
+            {
+                _edge.nodeIndex2 = edgeIter->first;
+
+                // if both nodes have high edge counts we filter out the edge:
+                if (isEdgeFilterNode1)
+                {
+                    const SVLocusNode& node2(locus.getNode(_edge.nodeIndex2));
+                    const bool isEdgeFilterNode2(node2.size()>_graphNodeMaxEdgeCount);
+                    if (isEdgeFilterNode2)
+                    {
+#ifdef DEBUG_EDGER
+                        log_os << "EDGER: advance filtering @ index: " << _edgeIndex << "\n";
+#endif
+                        continue;
+                    }
+                }
+
+
+                const unsigned firstTargetBin(_edgeIndex%_binCount);
+                unsigned targetBin(firstTargetBin);
+
+                do
+                {
+                    if (_binTotalCount[targetBin] < _avgBinTotalCount) break;
+                    targetBin=((targetBin+1)%_binCount);
+                }
+                while (targetBin != firstTargetBin);
+
+#ifdef DEBUG_EDGER
+                log_os << "EDGER: edgeIndex,ftarget,target,binIndex " << _edgeIndex << " " << firstTargetBin << " " << targetBin << " " << _binIndex << "\n";
+#endif
+
+                _edgeIndex++;
+                if (targetBin == _binIndex)
+                {
+                    // get edge count:
+                    unsigned edgeCount(edgeIter->second.getCount());
+                    {
+                        const bool isSelfEdge(edgeIter->first == _edge.nodeIndex1);
+                        if (! isSelfEdge) edgeCount += locus.getEdge(edgeIter->first,_edge.nodeIndex1).getCount();
+                    }
+
+                    _binTotalCount[targetBin] += edgeCount;
+                    return;
+                }
+            }
+            ++_edge.nodeIndex1;
+            _edge.nodeIndex2=_edge.nodeIndex1;
+        }
+        ++_edge.locusIndex;
+        _edge.nodeIndex1=0;
+        _edge.nodeIndex2=0;
+    }
+}
+
+
+
+bool
+EdgeRetrieverJumpBin::
+next()
+{
+#ifdef DEBUG_EDGER
+    log_os << "EDGER: start index: " << _edgeIndex << "\n";
+#endif
+
+    if (_edge.locusIndex >= _set.size()) return false;
+
+    advanceEdge();
+#ifdef DEBUG_EDGER
+    log_os << "EDGER: advanced index: " << _edgeIndex << " " << _edge  << "\n";
+#endif
+
+    return (_edge.locusIndex < _set.size());
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/EdgeRetrieverJumpBin.hh b/src/c++/lib/applications/GenerateSVCandidates/EdgeRetrieverJumpBin.hh
new file mode 100644
index 0000000..6289c2f
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/EdgeRetrieverJumpBin.hh
@@ -0,0 +1,70 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "EdgeRetriever.hh"
+
+
+/// WARNING -- initial testing suggests this class still has a possible edge repetition/dropout bug
+///            this still has potential but only if you have time to go in and hunt down the bug
+
+
+/// provide an iterator over edges in a set of SV locus graphs
+///
+/// designed to allow parallelization of the graph processing by
+/// dividing iteration into a set of bins with similar total edge
+/// observation counts
+///
+/// the contents of the bins are designed to be distributed evenly over the sequence of edges
+///
+struct EdgeRetrieverJumpBin : public EdgeRetriever
+{
+    /// \param[in] graphNodeMaxEdgeCount filtration parameter for skipping edges from highly connected nodes (set to zero to disable)
+    /// \param[in] binCount total number of parallel bins, must be 1 or greater
+    /// \param[in] binIndex parallel bin id, must be less than binCount
+    EdgeRetrieverJumpBin(
+        const SVLocusSet& set,
+        const unsigned graphNodeMaxEdgeCount,
+        const unsigned binCount,
+        const unsigned binIndex);
+
+    bool
+    next();
+
+private:
+    void
+    advanceEdge();
+
+    typedef unsigned long count_t;
+
+    unsigned _binCount;
+    unsigned _binIndex;
+    count_t _edgeIndex;
+
+    // additional 'load balancing' structures:
+    count_t _avgBinTotalCount;
+
+    std::vector<count_t> _binTotalCount;
+};
diff --git a/src/c++/lib/applications/GenerateSVCandidates/EdgeRetrieverLocus.cpp b/src/c++/lib/applications/GenerateSVCandidates/EdgeRetrieverLocus.cpp
new file mode 100644
index 0000000..df89cc1
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/EdgeRetrieverLocus.cpp
@@ -0,0 +1,150 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "EdgeRetrieverLocus.hh"
+
+#include <cassert>
+
+#include <iostream>
+
+//#define DEBUG_EDGER
+
+#ifdef DEBUG_EDGER
+#include "blt_util/log.hh"
+#endif
+
+
+
+EdgeRetrieverLocus::
+EdgeRetrieverLocus(
+    const SVLocusSet& set,
+    const unsigned graphNodeMaxEdgeCount,
+    const LocusEdgeOptions& opt) :
+    EdgeRetriever(set, graphNodeMaxEdgeCount),
+    _opt(opt),
+    _isInit(false)
+{
+    assert(_opt.locusIndex<set.size());
+}
+
+
+
+/// this filter enables a special option to filter down to all edges connected to
+/// a single node or only the edge connecting two nodes:
+static
+bool
+isEdgeFiltered(
+    const LocusEdgeOptions& opt,
+    const EdgeInfo& edge)
+{
+    if (! opt.isNodeIndex1) return false;
+    if (opt.isNodeIndex2)
+    {
+        const bool isMatch(
+            (edge.nodeIndex1 == opt.nodeIndex1) &&
+            (edge.nodeIndex2 == opt.nodeIndex2));
+        const bool isSwapMatch(
+            (edge.nodeIndex2 == opt.nodeIndex1) &&
+            (edge.nodeIndex1 == opt.nodeIndex2));
+
+        return (! (isMatch || isSwapMatch));
+    }
+    else
+    {
+        const bool isMatch
+        (edge.nodeIndex1 == opt.nodeIndex1);
+        const bool isSwapMatch
+        (edge.nodeIndex2 == opt.nodeIndex1);
+
+        return (! (isMatch || isSwapMatch));
+    }
+}
+
+
+
+void
+EdgeRetrieverLocus::
+advanceEdge()
+{
+    typedef SVLocusEdgesType::const_iterator edgeiter_t;
+
+    if (_isInit)
+    {
+        _edge.nodeIndex2++;
+    }
+    else
+    {
+        _edge.locusIndex=_opt.locusIndex;
+        _edge.nodeIndex1=0;
+        _edge.nodeIndex2=0;
+        _isInit=true;
+    }
+
+    const SVLocus& locus(_set.getLocus(_edge.locusIndex));
+    while (_edge.nodeIndex1<locus.size())
+    {
+        const SVLocusNode& node1(locus.getNode(_edge.nodeIndex1));
+        const bool isEdgeFilterNode1((_graphNodeMaxEdgeCount>0) && node1.size()>_graphNodeMaxEdgeCount);
+        const SVLocusEdgeManager node1Manager(node1.getEdgeManager());
+        edgeiter_t edgeIter(node1Manager.getMap().lower_bound(_edge.nodeIndex2));
+        const edgeiter_t edgeIterEnd(node1Manager.getMap().cend());
+
+        for (; edgeIter != edgeIterEnd; ++edgeIter)
+        {
+            _edge.nodeIndex2 = edgeIter->first;
+
+            // check whether this edge is in the requested set:
+            if (isEdgeFiltered(_opt,_edge)) continue;
+
+            // check whether this is a noise edge that we skip:
+            if (isEdgeFilterNode1)
+            {
+                const SVLocusNode& node2(locus.getNode(_edge.nodeIndex2));
+                const bool isEdgeFilterNode2(node2.size()>_graphNodeMaxEdgeCount);
+                if (isEdgeFilterNode2) continue;
+            }
+            return;
+        }
+        _edge.nodeIndex1++;
+        _edge.nodeIndex2=_edge.nodeIndex1;
+    }
+
+    _edge.locusIndex++;
+}
+
+
+
+bool
+EdgeRetrieverLocus::
+next()
+{
+#ifdef DEBUG_EDGER
+    log_os << "EDGERL: start\n";
+#endif
+
+    if (_edge.locusIndex > _opt.locusIndex) return false;
+    advanceEdge();
+
+    return (_edge.locusIndex == _opt.locusIndex);
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/EdgeRetrieverLocus.hh b/src/c++/lib/applications/GenerateSVCandidates/EdgeRetrieverLocus.hh
new file mode 100644
index 0000000..fd910f3
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/EdgeRetrieverLocus.hh
@@ -0,0 +1,54 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "EdgeRetriever.hh"
+#include "EdgeOptions.hh"
+
+
+/// provide an iterator over edges in a set of SV locus graphs
+///
+/// designed to allow parallelization of the graph processing by
+/// dividing iteration into a set of bins with similar total edge
+/// observation counts
+///
+struct EdgeRetrieverLocus final : public EdgeRetriever
+{
+    /// \param locusIndex iterate over all edges of a specific locus
+    EdgeRetrieverLocus(
+        const SVLocusSet& set,
+        const unsigned graphNodeMaxEdgeCount,
+        const LocusEdgeOptions& opt);
+
+    bool
+    next() override;
+
+private:
+    void
+    advanceEdge();
+
+    LocusEdgeOptions _opt;
+    bool _isInit;
+};
diff --git a/src/c++/lib/applications/GenerateSVCandidates/EdgeRuntimeTracker.cpp b/src/c++/lib/applications/GenerateSVCandidates/EdgeRuntimeTracker.cpp
new file mode 100644
index 0000000..857a1c4
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/EdgeRuntimeTracker.cpp
@@ -0,0 +1,93 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "EdgeRuntimeTracker.hh"
+
+#include "common/Exceptions.hh"
+
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+
+
+
+EdgeRuntimeTracker::
+EdgeRuntimeTracker(
+    const std::string& outputFile) :
+    _osPtr(nullptr),
+    _cand(0),
+    _compCand(0),
+    _assmCand(0),
+    _assmCompCand(0)
+{
+    if (outputFile.empty()) return;
+    _osPtr = new std::ofstream(outputFile.c_str());
+    if (! *_osPtr)
+    {
+        std::ostringstream oss;
+        oss << "ERROR: Can't open output file: " << outputFile << '\n';
+        BOOST_THROW_EXCEPTION(illumina::common::LogicException(oss.str()));
+    }
+
+    *_osPtr << std::setprecision(4);
+}
+
+
+
+EdgeRuntimeTracker::
+~EdgeRuntimeTracker()
+{
+    if (nullptr != _osPtr) delete _osPtr;
+}
+
+
+
+void
+EdgeRuntimeTracker::
+stop(const EdgeInfo& edge)
+{
+    edgeTime.stop();
+    const double lastTime(edgeTime.getWallSeconds());
+
+    /// the purpose of the log is to identify the most troublesome cases only, so cutoff the output at a minimum time:
+    static const double minLogTime(0.5);
+    if (lastTime >= minLogTime)
+    {
+        if (nullptr != _osPtr)
+        {
+            edge.write(*_osPtr);
+            *_osPtr << '\t' << lastTime
+                    << '\t' << _cand
+                    << '\t' << _compCand
+                    << '\t' << _assmCand
+                    << '\t' << _assmCompCand
+                    << '\t' << candTime.getWallSeconds()
+                    << '\t' << assmTime.getWallSeconds()
+                    << '\t' << remoteTime.getWallSeconds()
+                    << '\t' << scoreTime.getWallSeconds()
+                    << '\n';
+        }
+    }
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/EdgeRuntimeTracker.hh b/src/c++/lib/applications/GenerateSVCandidates/EdgeRuntimeTracker.hh
new file mode 100644
index 0000000..d54e27c
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/EdgeRuntimeTracker.hh
@@ -0,0 +1,96 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/time_util.hh"
+#include "svgraph/EdgeInfo.hh"
+
+#include "boost/utility.hpp"
+
+
+#include <iosfwd>
+
+
+
+/// simple edge time tracker and reporter
+struct EdgeRuntimeTracker : private boost::noncopyable
+{
+    explicit
+    EdgeRuntimeTracker(const std::string& outputFile);
+
+    ~EdgeRuntimeTracker();
+
+    void
+    start()
+    {
+        edgeTime.clear();
+        candTime.clear();
+        assmTime.clear();
+        scoreTime.clear();
+        remoteTime.clear();
+
+        edgeTime.resume();
+        _cand = 0;
+        _compCand = 0;
+        _assmCand = 0;
+        _assmCompCand = 0;
+    }
+
+    void
+    stop(const EdgeInfo& edge);
+
+    CpuTimes
+    getLastEdgeTime() const
+    {
+        return edgeTime.getTimes();
+    }
+
+    void
+    addCand(const bool isComplex)
+    {
+        if (isComplex) _compCand++;
+        else           _cand++;
+    }
+
+    void
+    addAssm(const bool isComplex)
+    {
+        if (isComplex) _assmCompCand++;
+        else           _assmCand++;
+    }
+
+    TimeTracker candTime;
+    TimeTracker assmTime;
+    TimeTracker scoreTime;
+    TimeTracker remoteTime;
+private:
+    std::ostream* _osPtr;
+    TimeTracker edgeTime;
+
+    unsigned _cand;
+    unsigned _compCand;
+    unsigned _assmCand;
+    unsigned _assmCompCand;
+};
diff --git a/src/c++/lib/applications/GenerateSVCandidates/FatSVCandidate.cpp b/src/c++/lib/applications/GenerateSVCandidates/FatSVCandidate.cpp
new file mode 100644
index 0000000..0153278
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/FatSVCandidate.cpp
@@ -0,0 +1,45 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "FatSVCandidate.hh"
+
+#include <iostream>
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const FatSVCandidate& svc)
+{
+    os << static_cast<SVCandidate>(svc);
+    for (unsigned eIndex(0); eIndex<SVEvidenceType::SIZE; ++eIndex)
+    {
+        os << "Index count for Etype: " << SVEvidenceType::label(eIndex)
+           << " bp1: " << svc.bp1EvidenceIndex[eIndex].size()
+           << " bp2: " << svc.bp2EvidenceIndex[eIndex].size()
+           << "\n";
+    }
+    return os;
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/FatSVCandidate.hh b/src/c++/lib/applications/GenerateSVCandidates/FatSVCandidate.hh
new file mode 100644
index 0000000..61c3bf5
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/FatSVCandidate.hh
@@ -0,0 +1,116 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "manta/SVCandidate.hh"
+
+#include <array>
+#include <iosfwd>
+
+/// extend vector A with the contents of B
+///
+/// example:
+/// vector<int> A = {1,2};
+/// vector<int> B = {2,3};
+/// appendVec(A,B);
+/// assert(A == {1,2,2,3});
+///
+template <typename Vec>
+void
+appendVec(
+    Vec& A,
+    const Vec& B)
+{
+    A.insert( A.end(), B.begin(), B.end() );
+}
+
+
+
+/// an SV candidate with additional details pertaining to input read evidence which is useful for filtration
+///
+struct FatSVCandidate : public SVCandidate
+{
+    typedef SVCandidate base_t;
+
+    FatSVCandidate()
+        : base_t()
+    {}
+
+    explicit
+    FatSVCandidate(const SVCandidate& copy)
+        : base_t(copy)
+    {}
+
+    FatSVCandidate(const FatSVCandidate&) = default;
+    FatSVCandidate& operator=(const FatSVCandidate&) = default;
+
+
+    bool
+    merge(
+        const FatSVCandidate& rhs,
+        const bool isExpandRegion = true)
+    {
+        if (! base_t::merge(rhs, isExpandRegion)) return false;
+        for (unsigned evidenceTypeIndex(0); evidenceTypeIndex<SVEvidenceType::SIZE; ++evidenceTypeIndex)
+        {
+            appendVec(bp1EvidenceIndex[evidenceTypeIndex],rhs.bp1EvidenceIndex[evidenceTypeIndex]);
+            appendVec(bp2EvidenceIndex[evidenceTypeIndex],rhs.bp2EvidenceIndex[evidenceTypeIndex]);
+        }
+        return true;
+    }
+
+#if 0
+    bool
+    merge(const SVCandidate& rhs)
+    {
+        if (! base_t::merge(rhs)) return false;
+
+        return true;
+    }
+#endif
+
+#if 0
+    void
+    clear()
+    {
+        base_t::clear();
+        for (auto& evi : bp1EvidenceIndex) evi.clear();
+        for (auto& evi : bp2EvidenceIndex) evi.clear();
+    }
+#endif
+
+    /// a 2d array type to track breakpoint evidence, the first dimension is evidence type
+    /// and the inner dimension is a vector with size equal to the number of (confident-mapping) observations.
+    /// For each observation the inner-diminsion value provides the index of the read used as an observation, which
+    /// can be used to estimate signal density vs. all reads.
+    typedef std::array<std::vector<double>,SVEvidenceType::SIZE> evidenceIndex_t;
+
+    evidenceIndex_t bp1EvidenceIndex;
+    evidenceIndex_t bp2EvidenceIndex;
+};
+
+
+std::ostream&
+operator<<(std::ostream& os, const FatSVCandidate& svc);
diff --git a/src/c++/lib/applications/GenerateSVCandidates/GSCEdgeStatsManager.cpp b/src/c++/lib/applications/GenerateSVCandidates/GSCEdgeStatsManager.cpp
new file mode 100644
index 0000000..18c5249
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/GSCEdgeStatsManager.cpp
@@ -0,0 +1,64 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "GSCEdgeStatsManager.hh"
+#include "common/Exceptions.hh"
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+
+
+GSCEdgeStatsManager::
+GSCEdgeStatsManager(
+    const std::string& outputFile)
+    : _osPtr(nullptr)
+{
+    if (outputFile.empty()) return;
+    _osPtr = new std::ofstream(outputFile.c_str());
+    if (! *_osPtr)
+    {
+        std::ostringstream oss;
+        oss << "ERROR: Can't open output file: " << outputFile << '\n';
+        BOOST_THROW_EXCEPTION(illumina::common::LogicException(oss.str()));
+    }
+
+    lifeTime.resume();
+}
+
+
+
+GSCEdgeStatsManager::
+~GSCEdgeStatsManager()
+{
+    if (_osPtr != nullptr)
+    {
+        lifeTime.stop();
+        edgeStats.edgeData.lifeTime=lifeTime.getTimes();
+        edgeStats.save(*_osPtr);
+        delete _osPtr;
+        _osPtr = nullptr;
+    }
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/GSCEdgeStatsManager.hh b/src/c++/lib/applications/GenerateSVCandidates/GSCEdgeStatsManager.hh
new file mode 100644
index 0000000..685a5da
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/GSCEdgeStatsManager.hh
@@ -0,0 +1,138 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "EdgeRuntimeTracker.hh"
+#include "appstats/GSCEdgeStats.hh"
+#include "blt_util/time_util.hh"
+#include "svgraph/EdgeInfo.hh"
+
+#include "boost/utility.hpp"
+
+#include <iosfwd>
+#include <string>
+
+
+/// handles all messy real world interaction for the stats module,
+/// stats module itself just accumulates data and
+///
+struct GSCEdgeStatsManager : private boost::noncopyable
+{
+    explicit
+    GSCEdgeStatsManager(
+        const std::string& outputFile);
+
+    ~GSCEdgeStatsManager();
+
+    void
+    updateEdgeCandidates(
+        const EdgeInfo& edge,
+        const unsigned candCount,
+        const SVFinderStats& finderStats)
+    {
+        if (_osPtr == nullptr) return;
+
+        GSCEdgeGroupStats& gStats(getStatsGroup(edge));
+        gStats.totalInputEdgeCount++;
+        gStats.totalCandidateCount+=candCount;
+        gStats.candidatesPerEdge.increment(candCount);
+        gStats.finderStats.merge(finderStats);
+    }
+
+    void
+    updateMJFilter(
+        const EdgeInfo& edge,
+        const unsigned mjComplexCount,
+        const unsigned mjSpanningFilterCount)
+    {
+        if (_osPtr == nullptr) return;
+
+        GSCEdgeGroupStats& gStats(getStatsGroup(edge));
+        gStats.totalComplexCandidate += mjComplexCount;
+        gStats.totalSpanningCandidateFilter += mjSpanningFilterCount;
+    }
+
+    void
+    updateJunctionCandidates(
+        const EdgeInfo& edge,
+        const unsigned junctionCount,
+        const bool isComplex)
+    {
+        if (_osPtr == nullptr) return;
+
+        GSCEdgeGroupStats& gStats(getStatsGroup(edge));
+        gStats.totalJunctionCount+=junctionCount;
+        if (isComplex) gStats.totalComplexJunctionCount+=junctionCount;
+        gStats.breaksPerJunction.increment(junctionCount);
+    }
+
+    void
+    updateAssemblyCount(
+        const EdgeInfo& edge,
+        const unsigned assemblyCount,
+        const bool isSpanning,
+        const bool isOverlapSkip = false)
+    {
+        if (_osPtr == nullptr) return;
+
+        GSCEdgeGroupStats& gStats(getStatsGroup(edge));
+        gStats.totalAssemblyCandidates += assemblyCount;
+        if (isSpanning) gStats.totalSpanningAssemblyCandidates += assemblyCount;
+        if (isOverlapSkip)
+        {
+            gStats.totalJunctionAssemblyOverlapSkips++;
+        }
+        else
+        {
+            gStats.assemblyCandidatesPerJunction.increment(assemblyCount);
+        }
+    }
+
+    void
+    updateScoredEdgeTime(
+        const EdgeInfo& edge,
+        const EdgeRuntimeTracker& edgeTracker)
+    {
+        if (_osPtr == nullptr) return;
+
+        GSCEdgeGroupStats& gStats(getStatsGroup(edge));
+        gStats.totalTime.merge(edgeTracker.getLastEdgeTime());
+        gStats.candTime.merge(edgeTracker.candTime.getTimes());
+        gStats.assemblyTime.merge(edgeTracker.assmTime.getTimes());
+        gStats.scoringTime.merge(edgeTracker.scoreTime.getTimes());
+    }
+
+private:
+    GSCEdgeGroupStats&
+    getStatsGroup(
+        const EdgeInfo& edge)
+    {
+        return (edge.isSelfEdge() ? edgeStats.edgeData.selfEdges : edgeStats.edgeData.remoteEdges);
+    }
+
+    std::ostream* _osPtr;
+    TimeTracker lifeTime;
+    GSCEdgeStats edgeStats;
+};
diff --git a/src/c++/lib/applications/GenerateSVCandidates/GSCOptions.cpp b/src/c++/lib/applications/GenerateSVCandidates/GSCOptions.cpp
new file mode 100644
index 0000000..924e274
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/GSCOptions.cpp
@@ -0,0 +1,222 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "GSCOptions.hh"
+#include "EdgeOptionsParser.hh"
+
+#include "blt_util/log.hh"
+#include "common/ProgramUtil.hh"
+#include "options/AlignmentFileOptionsParser.hh"
+#include "options/ReadScannerOptionsParser.hh"
+#include "options/optionsUtil.hh"
+
+#include "boost/program_options.hpp"
+
+
+
+static
+void
+usage(
+    std::ostream& os,
+    const illumina::Program& prog,
+    const boost::program_options::options_description& visible,
+    const char* msg = nullptr)
+{
+    usage(os, prog, visible, "call candidates from an SV Locus graph", "", msg);
+}
+
+
+static
+void
+checkStandardizeUsageFile(
+    std::ostream& os,
+    const illumina::Program& prog,
+    const boost::program_options::options_description& visible,
+    std::string& filename,
+    const char* fileLabel)
+{
+    std::string errorMsg;
+    if ( checkStandardizeInputFile(filename, fileLabel, errorMsg))
+    {
+        usage(os,prog,visible,errorMsg.c_str());
+    }
+}
+
+
+
+void
+parseGSCOptions(
+    const illumina::Program& prog,
+    int argc, char* argv[],
+    GSCOptions& opt)
+{
+    namespace po = boost::program_options;
+    po::options_description req("configuration");
+    req.add_options()
+    ("graph-file", po::value(&opt.graphFilename),
+     "sv locus graph file (required)")
+    ("align-stats", po::value(&opt.statsFilename),
+     "pre-computed alignment statistics for the input alignment files (required)")
+    ("chrom-depth", po::value(&opt.chromDepthFilename),
+     "average depth estimate for each chromosome")
+    ("ref", po::value(&opt.referenceFilename),
+     "fasta reference sequence (required)")
+    ("truth-vcf", po::value(&opt.truthVcfFilename),
+     "optional truth VCF file (for testing)")
+    ("edge-runtime-log", po::value(&opt.edgeRuntimeFilename),
+     "optionally log time for long-running edges to this file")
+    ("edge-stats-log", po::value(&opt.edgeStatsFilename),
+     "optionally log aggregate edge statistics to this file")
+    ("candidate-output-file", po::value(&opt.candidateOutputFilename),
+     "Write SV candidates to file (required)")
+    ("diploid-output-file", po::value(&opt.diploidOutputFilename),
+     "Write germline diploid SVs to file (at least one non-tumor alignment file must be specified)")
+    ("somatic-output-file", po::value(&opt.somaticOutputFilename),
+     "Write somatic SVs to file (at least one tumor and non-tumor alignment file must be specified)")
+    ("tumor-output-file", po::value(&opt.tumorOutputFilename),
+     "Write tumor SVs to file (at least one tumor alignment file must be specified)")
+    ("verbose", po::value(&opt.isVerbose)->zero_tokens(),
+     "Turn on low-detail INFO logging.")
+    ("skip-assembly", po::value(&opt.isSkipAssembly)->zero_tokens(),
+     "Turn off all breakend and small-variant assembly. Only large, imprecise variants will be reported based on anomalous read pairs.")
+    ("skip-scoring", po::value(&opt.isSkipScoring)->zero_tokens(),
+     "Turn off all scoring models and output candidates only.")
+    ("skip-remote-reads", po::value(&opt.isSkipRemoteReads)->zero_tokens(),
+     "Turn off remote mapq0 read search for assembly (reduces assembly success for insertions/mobile elements).")
+    ("rna", po::value(&opt.isRNA)->zero_tokens(),
+     "For RNA input. Skip small deletions and modify diploid scoring.")
+    ("unstranded", po::value(&opt.isUnstrandedRNA)->zero_tokens(),
+     "For RNA input. Is data stranded?.")
+    ("min-candidate-spanning-count", po::value(&opt.minCandidateSpanningCount)->default_value(opt.minCandidateSpanningCount),
+     "minimum number of supporting spanning observations required to become an SV candidate")
+    ("min-scored-sv-size", po::value(&opt.minScoredVariantSize)->default_value(opt.minScoredVariantSize),
+     "minimum size for variants which are scored and output following initial candidate generation")
+    ("evidence-bam-stub", po::value(&opt.supportBamStub)->default_value(opt.supportBamStub),
+     "Directory and prefix of bams storing the supporting reads of SVs")
+    ;
+
+    po::options_description alignDesc(getOptionsDescription(opt.alignFileOpt));
+    po::options_description edgeDesc(getOptionsDescription(opt.edgeOpt));
+    po::options_description scanDesc(getOptionsDescription(opt.scanOpt));
+    po::options_description diploidCallDesc(getOptionsDescription(opt.diploidOpt));
+    po::options_description somaticCallDesc(getOptionsDescription(opt.somaticOpt));
+    po::options_description tumorCallDesc(getOptionsDescription(opt.tumorOpt));
+
+    po::options_description help("help");
+    help.add_options()
+    ("help,h","print this message");
+
+    po::options_description visible("options");
+    visible.add(alignDesc).add(scanDesc).add(req).add(edgeDesc).add(diploidCallDesc).add(somaticCallDesc).add(tumorCallDesc).add(help);
+
+    bool po_parse_fail(false);
+    po::variables_map vm;
+    try
+    {
+        po::store(po::parse_command_line(argc, argv, visible,
+                                         po::command_line_style::unix_style ^ po::command_line_style::allow_short), vm);
+        po::notify(vm);
+    }
+    catch (const boost::program_options::error& e)     // todo:: find out what is the more specific exception class thrown by program options
+    {
+        log_os << "\nERROR: Exception thrown by option parser: " << e.what() << "\n";
+        po_parse_fail=true;
+    }
+
+    if ((argc<=1) || (vm.count("help")) || po_parse_fail)
+    {
+        usage(log_os,prog,visible);
+    }
+
+    std::string errorMsg;
+    if (parseOptions(vm, opt.edgeOpt, errorMsg))
+    {}
+    else if (parseOptions(vm, opt.alignFileOpt, errorMsg))
+    {}
+    else if (parseOptions(vm, opt.scanOpt, errorMsg))
+    {}
+    else if (opt.statsFilename.empty())
+    {
+        errorMsg="Need the alignment stats file";
+    }
+    else if (opt.referenceFilename.empty())
+    {
+        errorMsg="Need the FASTA reference file";
+    }
+
+    if (! errorMsg.empty()) usage(log_os,prog,visible,errorMsg.c_str());
+
+    checkStandardizeUsageFile(log_os,prog,visible,opt.graphFilename,"SV locus graph");
+    checkStandardizeUsageFile(log_os,prog,visible,opt.referenceFilename,"reference fasta");
+    checkStandardizeUsageFile(log_os,prog,visible,opt.statsFilename,"alignment statistics");
+
+    if (! opt.chromDepthFilename.empty())
+    {
+        checkStandardizeUsageFile(log_os,prog,visible,opt.chromDepthFilename,"chromosome depth");
+    }
+    if (opt.candidateOutputFilename.empty())
+    {
+        usage(log_os,prog,visible,"Must specify candidate output file");
+    }
+
+    {
+        unsigned normalCount(0);
+        unsigned tumorCount(0);
+        for (const bool value : opt.alignFileOpt.isAlignmentTumor)
+        {
+            if (value) tumorCount++;
+            else      normalCount++;
+        }
+
+        /*if (opt.diploidOutputFilename.empty())
+        {
+        	usage(log_os,prog,visible,"Must specify diploid output file");
+        }*/
+
+        if (! opt.diploidOutputFilename.empty())
+        {
+            if (normalCount==0)
+            {
+                usage(log_os,prog,visible,"Must specify at least one non-tumor alignment file for diploid output");
+            }
+        }
+
+        if (! opt.somaticOutputFilename.empty())
+        {
+            if ((normalCount==0) || (tumorCount==0))
+            {
+                usage(log_os,prog,visible,"Must specify at least one tumor and non-tumor alignment file for somatic output");
+            }
+        }
+
+        if (! opt.tumorOutputFilename.empty())
+        {
+            if (tumorCount==0)
+            {
+                usage(log_os,prog,visible,"Must specify at least one tumor alignment file for tumor output");
+            }
+        }
+    }
+}
+
diff --git a/src/c++/lib/applications/GenerateSVCandidates/GSCOptions.hh b/src/c++/lib/applications/GenerateSVCandidates/GSCOptions.hh
new file mode 100644
index 0000000..1e1f6b5
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/GSCOptions.hh
@@ -0,0 +1,88 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "EdgeOptions.hh"
+#include "common/Program.hh"
+#include "options/AlignmentFileOptions.hh"
+#include "options/CallOptionsDiploid.hh"
+#include "options/CallOptionsShared.hh"
+#include "options/CallOptionsSomatic.hh"
+#include "options/CallOptionsTumor.hh"
+#include "options/ReadScannerOptions.hh"
+#include "options/SVRefinerOptions.hh"
+
+#include <string>
+#include <vector>
+
+
+struct GSCOptions
+{
+    AlignmentFileOptions alignFileOpt;
+    EdgeOptions edgeOpt;
+    ReadScannerOptions scanOpt;
+    SVRefinerOptions refineOpt;
+    CallOptionsShared callOpt;
+    CallOptionsDiploid diploidOpt;
+    CallOptionsSomatic somaticOpt;
+    CallOptionsTumor tumorOpt;
+
+    std::string graphFilename;
+    std::string referenceFilename;
+    std::string statsFilename;
+    std::string chromDepthFilename;
+    std::string truthVcfFilename;
+    std::string edgeRuntimeFilename;
+    std::string edgeStatsFilename;
+
+    std::string candidateOutputFilename;
+    std::string diploidOutputFilename;
+    std::string somaticOutputFilename;
+    std::string tumorOutputFilename;
+    std::string supportBamStub;
+
+    bool isVerbose = false; ///< provide some high-level log info to assist in debugging
+
+    bool isSkipAssembly = false; ///< if true, skip assembly and run a low-resolution, breakdancer-like subset of the workflow
+
+    bool isSkipScoring = false; ///< if true, skip quality scoring and output candidates only
+
+    bool isSkipRemoteReads = false; ///< if true, don't search for non-local mapq0 mate pairs for assembly
+
+    bool isRNA = false; ///< if true, RNA specific filtering on candidates and diploid scoring is used
+
+    bool isUnstrandedRNA = false; /// For unstranded RNA data, the direction of fusion transcripts is unknown
+
+    unsigned minCandidateSpanningCount = 3; ///< how many spanning evidence observations are required to become a candidate?
+
+    unsigned minScoredVariantSize = 51; ///< min size for scoring and scored output following candidate generation
+};
+
+
+void
+parseGSCOptions(
+    const illumina::Program& prog,
+    int argc, char* argv[],
+    GSCOptions& opt);
diff --git a/src/c++/lib/applications/GenerateSVCandidates/GenerateSVCandidates.cpp b/src/c++/lib/applications/GenerateSVCandidates/GenerateSVCandidates.cpp
new file mode 100644
index 0000000..269a856
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/GenerateSVCandidates.cpp
@@ -0,0 +1,297 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "GenerateSVCandidates.hh"
+#include "EdgeRetrieverBin.hh"
+#include "EdgeRetrieverLocus.hh"
+#include "GSCOptions.hh"
+#include "SVCandidateProcessor.hh"
+#include "SVFinder.hh"
+#include "SVSupports.hh"
+
+#include "blt_util/log.hh"
+#include "common/Exceptions.hh"
+#include "manta/MultiJunctionUtil.hh"
+#include "manta/SVCandidateUtil.hh"
+
+#include <iostream>
+#include <string>
+
+//#define DEBUG_GSV
+
+
+/// provide additional edge details, intended for attachment to an in-flight exception:
+static
+void
+dumpEdgeInfo(
+    const EdgeInfo& edge,
+    const SVLocusSet& set,
+    std::ostream& os)
+{
+    os << edge;
+    os << "\tnode1:" << set.getLocus(edge.locusIndex).getNode(edge.nodeIndex1);
+    os << "\tnode2:" << set.getLocus(edge.locusIndex).getNode(edge.nodeIndex2);
+}
+
+
+
+/// we can either traverse all edges in a single locus (disjoint subgraph) of the graph
+/// OR
+/// traverse all edges in one "bin" -- that is, one out of binCount subsets of the total
+/// graph edges. Each bin is designed to be of roughly equal size in terms of total
+/// anticipated workload, so that we have good parallel processing performance.
+///
+static
+EdgeRetriever*
+edgeRFactory(
+    const SVLocusSet& set,
+    const EdgeOptions& opt)
+{
+    if (opt.isLocusIndex)
+    {
+        return (new EdgeRetrieverLocus(set, opt.graphNodeMaxEdgeCount, opt.locusOpt));
+    }
+    else
+    {
+        return (new EdgeRetrieverBin(set, opt.graphNodeMaxEdgeCount, opt.binCount, opt.binIndex));
+    }
+}
+
+
+/// TODO temporarily shoved here, needs a better home:
+struct MultiJunctionFilter
+{
+    MultiJunctionFilter(
+        const GSCOptions& opt,
+        GSCEdgeStatsManager& edgeStatMan)
+        : _opt(opt),
+          _edgeStatMan(edgeStatMan)
+    {}
+
+    void
+    filterGroupCandidateSV(
+        const EdgeInfo& edge,
+        const std::vector<SVCandidate>& svs,
+        std::vector<SVMultiJunctionCandidate>& mjSVs)
+    {
+        unsigned mjComplexCount(0);
+        unsigned mjSpanningFilterCount(0);
+        findMultiJunctionCandidates(svs, _opt.minCandidateSpanningCount, mjComplexCount, mjSpanningFilterCount, mjSVs);
+        _edgeStatMan.updateMJFilter(edge, mjComplexCount, mjSpanningFilterCount);
+
+        if (_opt.isVerbose)
+        {
+            unsigned junctionCount(mjSVs.size());
+            unsigned candidateCount(0);
+            for (const SVMultiJunctionCandidate& mj : mjSVs)
+            {
+                candidateCount += mj.junction.size();
+            }
+            log_os << __FUNCTION__ << ": Low-resolution candidate filtration complete. "
+                   << "candidates: " << candidateCount << " "
+                   << "junctions: " << junctionCount << " "
+                   << "complex: " << mjComplexCount << " "
+                   << "spanningfilt: " << mjSpanningFilterCount << "\n";
+        }
+#ifdef DEBUG_GSV
+        log_os << __FUNCTION__ << ": final candidate list";
+        const unsigned junctionCount(mjSVs.size());
+        for (unsigned junctionIndex(0); junctionIndex< junctionCount; ++junctionIndex)
+        {
+            const auto& mj(mjSVs[junctionIndex]);
+            const unsigned junctionCandCount(mj.junction.size());
+            log_os << __FUNCTION__ << ": JUNCTION " << junctionIndex << " with " << junctionCandCount << " candidates\n";
+            for (unsigned junctionCandIndex(0); junctionCandIndex< junctionCandCount; ++junctionCandIndex)
+            {
+                log_os << __FUNCTION__ << ":  JUNCTION " << junctionIndex << " Candidate "
+                       << junctionCandIndex << " " << mj.junction[junctionCandIndex] << "\n";
+            }
+        }
+#endif
+    }
+
+private:
+    const GSCOptions& _opt;
+    GSCEdgeStatsManager& _edgeStatMan;
+};
+
+
+#if 0
+/// edge indices+graph evidence counts and regions:
+///
+/// this is designed to be useful even when the locus graph is not present
+struct EhancedEdgeInfo
+{
+
+};
+
+/// reduce the full (very-large) graph down to just the information we need during SVCandidate generation:
+struct ReducedGraphInfo
+{
+    ReducedGraphInfo(const GSCOptions& opt)
+
+    bam_header_info header;
+
+    std::vector<EnhancedEdgeInfo> edges;
+};
+#endif
+
+
+
+static
+void
+runGSC(
+    const GSCOptions& opt,
+    const char* progName,
+    const char* progVersion)
+{
+#if 0
+    {
+        // to save memory, load the graph and process/store only the information we need from it:
+    }
+#endif
+
+    EdgeRuntimeTracker edgeTracker(opt.edgeRuntimeFilename);
+    GSCEdgeStatsManager edgeStatMan(opt.edgeStatsFilename);
+
+    const SVLocusScanner readScanner(opt.scanOpt, opt.statsFilename, opt.alignFileOpt.alignmentFilename, opt.isRNA, !opt.isUnstrandedRNA);
+
+    SVFinder svFind(opt, readScanner, edgeTracker,edgeStatMan);
+    MultiJunctionFilter svMJFilter(opt,edgeStatMan);
+    const SVLocusSet& cset(svFind.getSet());
+
+    SVCandidateProcessor svProcessor(opt, readScanner, progName, progVersion, cset, edgeTracker, edgeStatMan);
+
+    std::unique_ptr<EdgeRetriever> edgerPtr(edgeRFactory(cset, opt.edgeOpt));
+    EdgeRetriever& edger(*edgerPtr);
+
+    SVCandidateSetData svData;
+    std::vector<SVCandidate> svs;
+    std::vector<SVMultiJunctionCandidate> mjSVs;
+
+    const unsigned sampleSize(opt.alignFileOpt.alignmentFilename.size());
+    std::vector<bam_streamer_ptr> origBamStreamPtrs;
+    std::vector<bam_dumper_ptr> supportBamDumperPtrs;
+
+    const bool isGenerateSupportBam(opt.supportBamStub.size() > 0);
+    if (isGenerateSupportBam)
+    {
+        for (unsigned idx(0); idx<sampleSize; ++idx)
+        {
+            std::string alignmentFile(opt.alignFileOpt.alignmentFilename[idx]);
+            bam_streamer_ptr bamStreamPtr(new bam_streamer(alignmentFile.c_str()));
+            origBamStreamPtrs.push_back(bamStreamPtr);
+
+            std::string supportBamName(opt.supportBamStub
+                                       + ".bam_" + std::to_string(idx)
+                                       + ".bam");
+            const bam_hdr_t& header(bamStreamPtr->get_header());
+            bam_dumper_ptr bamDumperPtr(new bam_dumper(supportBamName.c_str(), header));
+            supportBamDumperPtrs.push_back(bamDumperPtr);
+        }
+    }
+
+    if (opt.isVerbose)
+    {
+        log_os << __FUNCTION__ << ": " << cset.header << "\n";
+    }
+
+    while (edger.next())
+    {
+        const EdgeInfo& edge(edger.getEdge());
+
+        try
+        {
+            edgeTracker.start();
+
+            if (opt.isVerbose)
+            {
+                log_os << __FUNCTION__ << ": starting analysis of edge: ";
+                dumpEdgeInfo(edge,cset,log_os);
+            }
+
+            // find number, type and breakend range (or better: breakend distro) of SVs on this edge:
+            svFind.findCandidateSV(edge, svData, svs);
+
+            // filter long-range junctions outside of the candidate finder so that we can evaluate
+            // junctions which are part of a larger event (like a reciprocal translocation)
+            svMJFilter.filterGroupCandidateSV(edge, svs, mjSVs);
+
+
+            SupportSamples svSupports;
+            svSupports.supportSamples.resize(sampleSize);
+            // determine if this is the only edge for this node:
+            svProcessor.evaluateCandidates(edge, mjSVs, svData, svSupports);
+
+            // write supporting reads into bam files
+            if (isGenerateSupportBam)
+            {
+                for (unsigned idx(0); idx<sampleSize; ++idx)
+                {
+                    writeSupportBam(origBamStreamPtrs[idx],
+                                    svSupports.supportSamples[idx],
+                                    supportBamDumperPtrs[idx]);
+                }
+            }
+        }
+        catch (illumina::common::ExceptionData& e)
+        {
+            std::ostringstream oss;
+            dumpEdgeInfo(edge,cset,oss);
+            e << illumina::common::ExceptionMsg(oss.str());
+            throw;
+        }
+        catch (...)
+        {
+            log_os << "Exception caught while processing graph component: ";
+            dumpEdgeInfo(edge,cset,log_os);
+            throw;
+        }
+
+        edgeTracker.stop(edge);
+        if (opt.isVerbose)
+        {
+            log_os << __FUNCTION__ << ": Time to process last edge: ";
+            edgeTracker.getLastEdgeTime().reportSec(log_os);
+            log_os << "\n";
+        }
+
+        edgeStatMan.updateScoredEdgeTime(edge, edgeTracker);
+    }
+}
+
+
+
+void
+GenerateSVCandidates::
+runInternal(int argc, char* argv[]) const
+{
+    GSCOptions opt;
+
+    parseGSCOptions(*this,argc,argv,opt);
+#ifdef DEBUG_GSV
+    opt.isVerbose=true;
+#endif
+    runGSC(opt, name(), version());
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/GenerateSVCandidates.hh b/src/c++/lib/applications/GenerateSVCandidates/GenerateSVCandidates.hh
new file mode 100644
index 0000000..05f26e0
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/GenerateSVCandidates.hh
@@ -0,0 +1,42 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "common/Program.hh"
+
+
+/// generates candidate calls from graph edges
+///
+struct GenerateSVCandidates : public illumina::Program
+{
+    const char*
+    name() const
+    {
+        return "GenerateSVCandidates";
+    }
+
+    void
+    runInternal(int argc, char* argv[]) const;
+};
diff --git a/src/c++/lib/applications/GenerateSVCandidates/JunctionCallInfo.hh b/src/c++/lib/applications/GenerateSVCandidates/JunctionCallInfo.hh
new file mode 100644
index 0000000..d0ff1ab
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/JunctionCallInfo.hh
@@ -0,0 +1,91 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders and Xiaoyu Chen
+///
+
+#pragma once
+
+#include "SVEvidence.hh"
+#include "manta/SVScoreInfo.hh"
+#include "manta/SVCandidate.hh"
+
+
+/// manage all per-junction information consumed by an SV calling model
+///
+/// using this object facilities multi-breakend event scoring, but clearly
+/// separating out per-junction input info from junction-independent info
+///
+struct JunctionCallInfo
+{
+    JunctionCallInfo() :
+        _sv(NULL),
+        _evidence(NULL),
+        _baseInfo(NULL),
+        _spanningPairWeight(0)
+    {}
+
+    const SVCandidate&
+    getSV() const
+    {
+        assert(NULL != _sv);
+        return *_sv;
+    }
+
+    const SVEvidence&
+    getEvidence() const
+    {
+        assert(NULL != _evidence);
+        return *_evidence;
+    }
+
+    const SVScoreInfo&
+    getBaseInfo() const
+    {
+        assert(NULL != _baseInfo);
+        return *_baseInfo;
+    }
+
+    float
+    getSpanningWeight() const
+    {
+        return _spanningPairWeight;
+    }
+
+    void
+    init(
+        const SVCandidate& sv,
+        const SVEvidence& evidence,
+        const SVScoreInfo& baseInfo,
+        const float spanningPairWeight)
+    {
+        _sv=&sv;
+        _evidence=&evidence;
+        _baseInfo=&baseInfo;
+        _spanningPairWeight=spanningPairWeight;
+    }
+
+private:
+    const SVCandidate* _sv;
+    const SVEvidence* _evidence;
+    const SVScoreInfo* _baseInfo;
+    float _spanningPairWeight;
+};
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVCandidateAssemblyRefiner.cpp b/src/c++/lib/applications/GenerateSVCandidates/SVCandidateAssemblyRefiner.cpp
new file mode 100644
index 0000000..a89edc0
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVCandidateAssemblyRefiner.cpp
@@ -0,0 +1,2194 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Ole Schulz-Trieglaff
+/// \author Chris Saunders
+///
+
+#include "SVCandidateAssemblyRefiner.hh"
+
+#include "alignment/AlignmentUtil.hh"
+#include "blt_util/log.hh"
+#include "blt_util/seq_util.hh"
+#include "blt_util/align_path.hh"
+#include "htsapi/samtools_fasta_util.hh"
+#include "common/Exceptions.hh"
+#include "manta/SVCandidateUtil.hh"
+#include "manta/SVReferenceUtil.hh"
+
+#include <iostream>
+#include <unordered_set>
+
+//#define DEBUG_REFINER
+//#define DEBUG_CONTIG
+//#define DEBUG_KMER
+
+#ifdef DEBUG_REFINER
+#include "blt_util/seq_printer.hh"
+#endif
+
+
+
+/// process assembly/align info into simple reference coordinates that can be reported in the output vcf:
+///
+/// \param[in] isAlign1 if true, this breakend was aligned first by the jump aligner, and therefore left-aligned (if fwd)
+///                     or right-aligned (if rev)
+/// \param[in] jumpRange homologous range across the breakend
+///
+static
+void
+adjustAssembledBreakend(
+    const Alignment& align,
+    const bool isAlign1,
+    const unsigned jumpRange,
+    const reference_contig_segment& ref,
+    const bool isReversed,
+    SVBreakend& bp)
+{
+    const pos_t bpBeginOffset(getAlignBeginOffset(align, ref.seq().size(), isReversed));
+    const pos_t bpEndOffset(getAlignEndOffset(align, ref.seq().size(), isReversed));
+
+    const bool isBpAtAlignEnd(bp.state == SVBreakendState::RIGHT_OPEN);
+
+    const pos_t bpBreakendOffset(isBpAtAlignEnd ? (bpEndOffset -1) : bpBeginOffset );
+    const pos_t bpBreakendPos(ref.get_offset() + bpBreakendOffset);
+
+    const bool isLeftAligned(isAlign1 == isBpAtAlignEnd);
+
+    known_pos_range2& range(bp.interval.range);
+
+    if (isLeftAligned)
+    {
+        range.set_begin_pos(bpBreakendPos);
+        range.set_end_pos(bpBreakendPos + static_cast<pos_t>(jumpRange) + 1);
+    }
+    else
+    {
+        range.set_begin_pos(bpBreakendPos - static_cast<pos_t>(jumpRange));
+        range.set_end_pos(bpBreakendPos + 1);
+    }
+}
+
+
+/// \param[in] maxQCRefSpan what is the longest flanking sequence length considered for the high quality qc requirement?
+static
+bool
+isFilterSpanningAlignment(
+    const unsigned maxQCRefSpan,
+    const GlobalJumpAligner<int>& aligner,
+    const bool isLeadingPath,
+    const bool isRNA,
+    const ALIGNPATH::path_t& input_apath)
+{
+    unsigned minAlignReadLength(30); ///< require min length of each contig sub-alignment even after off-reference clipping:
+    static const float minScoreFrac(0.75); ///< require min fraction of optimal score in each contig sub-alignment
+    if (isRNA)
+    {
+        minAlignReadLength = 20;
+    }
+
+    ALIGNPATH::path_t apath(input_apath);
+
+#ifdef DEBUG_REFINER
+    log_os << __FUNCTION__ << ": apath: " << apath << " ; maxRefSpan: " << maxQCRefSpan << "\n";
+#endif
+
+    // prepare apath by orienting it always going forward from the breakend and limiting the length to
+    // the first maxQCRefSpan ref bases covered:
+    //
+    if (isLeadingPath)
+    {
+        std::reverse(apath.begin(),apath.end());
+    }
+
+    apath_limit_ref_length(maxQCRefSpan,apath);
+
+    const unsigned readSize(apath_read_length(apath));
+    const unsigned clipSize(apath_soft_clip_trail_size(apath));
+
+    assert(clipSize <= readSize);
+
+    const unsigned clippedReadSize(readSize-clipSize);
+
+    if (clippedReadSize < minAlignReadLength)
+    {
+#ifdef DEBUG_REFINER
+        log_os << __FUNCTION__ << ": Rejecting highest scoring contig sub-alignment. Sub-alignment read length after clipping is: " << clippedReadSize << " min size is: " << minAlignReadLength << "\n";
+#endif
+        return true;
+    }
+
+    int nonClipScore(aligner.getPathScore(apath, false));
+    const int optimalScore(clippedReadSize * aligner.getScores().match);
+
+    assert(optimalScore>0);
+    if (nonClipScore < 0) nonClipScore = 0;
+
+    const float scoreFrac(static_cast<float>(nonClipScore)/static_cast<float>(optimalScore));
+
+    if (scoreFrac < minScoreFrac)
+    {
+#ifdef DEBUG_REFINER
+        log_os << __FUNCTION__ << ": Rejecting highest scoring contig sub-alignment. Fraction of optimal alignment score is: " << scoreFrac << " minScoreFrac: " << minScoreFrac << "\n";
+#endif
+        return true;
+    }
+    return false;
+}
+
+
+/// identify path indel sequences with an insert or delete segment greater than minSize
+///
+static
+void
+getLargeIndelSegments(
+    const ALIGNPATH::path_t& apath,
+    const unsigned minSize,
+    std::vector<std::pair<unsigned,unsigned> >& segments)
+{
+    using namespace ALIGNPATH;
+
+    bool isInSegment(false);
+    bool isCandidate(false);
+    unsigned segmentStart(0);
+
+    segments.clear();
+
+    const unsigned as(apath.size());
+    for (unsigned i(0); i<as; ++i)
+    {
+        const path_segment& ps(apath[i]);
+
+        if ((ps.type == DELETE) || (ps.type == INSERT))
+        {
+            if (ps.length>=minSize) isCandidate=true;
+            if (! isInSegment) segmentStart = i;
+            isInSegment=true;
+        }
+        else
+        {
+            if (isCandidate)
+            {
+                assert(i>0);
+                segments.push_back(std::make_pair(segmentStart,(i-1)));
+            }
+            isInSegment=false;
+            isCandidate=false;
+        }
+    }
+
+    if (isCandidate)
+    {
+        assert(as>0);
+        segments.push_back(std::make_pair(segmentStart,(as-1)));
+    }
+}
+
+
+
+#ifdef ITERATIVE_ASSEMBLER
+static
+unsigned
+getLargestIndelSize(
+    const ALIGNPATH::path_t& apath,
+    const std::vector<std::pair<unsigned,unsigned> >& segments)
+{
+    unsigned largestSize(0);
+
+    typedef std::pair<unsigned,unsigned> segment_t;
+    for (const segment_t& seg : segments)
+    {
+        for (unsigned i(seg.first); i<=seg.second; i++)
+        {
+            const ALIGNPATH::path_segment& ps(apath[i]);
+            if ((ps.type == ALIGNPATH::DELETE) || (ps.type == ALIGNPATH::INSERT))
+            {
+                if (ps.length > largestSize) largestSize = ps.length;
+            }
+        }
+    }
+
+    return largestSize;
+}
+#endif
+
+
+/// identify the single largest insert segment, if one exists above minSize:
+///
+static
+void
+getLargestInsertSegment(
+    const ALIGNPATH::path_t& apath,
+    const unsigned minSize,
+    std::vector<std::pair<unsigned,unsigned> >& segments)
+{
+    using namespace ALIGNPATH;
+
+    bool isInSegment(false);
+    bool isCandidate(false);
+    unsigned segmentStart(0);
+
+    bool isMaxSegment(false);
+    unsigned maxSegmentSize(minSize);
+    std::pair<unsigned,unsigned> maxSegment;
+
+    segments.clear();
+
+    const unsigned as(apath.size());
+    for (unsigned i(0); i<as; ++i)
+    {
+        const path_segment& ps(apath[i]);
+
+        if ((ps.type == DELETE) || (ps.type == INSERT))
+        {
+            if (ps.type == INSERT)
+            {
+                if (ps.length>=maxSegmentSize)
+                {
+                    isMaxSegment=true;
+                    maxSegmentSize=ps.length;
+                    isCandidate=true;
+                }
+            }
+            if (! isInSegment) segmentStart = i;
+            isInSegment=true;
+        }
+        else
+        {
+            if (isCandidate)
+            {
+                assert(i>0);
+                maxSegment=std::make_pair(segmentStart,(i-1));
+            }
+            isInSegment=false;
+            isCandidate=false;
+        }
+    }
+
+    if (isCandidate)
+    {
+        assert(as>0);
+        maxSegment=std::make_pair(segmentStart,(as-1));
+    }
+
+    if (isMaxSegment)
+    {
+        segments.push_back(maxSegment);
+    }
+}
+
+
+
+/// add simple cigar string to spanning alignments for the subset of cases (insertions and deletions) where this is possible
+///
+/// note that we may not always print this out, even though we compute the cigar here -- this is dependent on the output file
+/// format and conventions related to variant size, precision, etc.
+///
+static
+void
+addCigarToSpanningAlignment(
+    SVCandidate& sv)
+{
+    const SV_TYPE::index_t svType(getSVType(sv));
+
+    if (svType != SV_TYPE::INDEL) return;
+
+    const bool isBp1First(sv.bp1.interval.range.begin_pos()<=sv.bp2.interval.range.begin_pos());
+
+    const SVBreakend& bpA(isBp1First ? sv.bp1 : sv.bp2);
+    const SVBreakend& bpB(isBp1First ? sv.bp2 : sv.bp1);
+
+    const pos_t deleteSize((bpB.interval.range.begin_pos() - bpA.interval.range.begin_pos()) -1);
+    const pos_t insertSize(sv.insertSeq.size());
+
+    assert(deleteSize >= 0);
+    assert(insertSize || deleteSize);
+
+    // follow convention from non-spanning alignments of xIyD in case of complex variant:
+    if (insertSize)
+    {
+        sv.insertAlignment.emplace_back(ALIGNPATH::INSERT,insertSize);
+    }
+    if (deleteSize)
+    {
+        sv.insertAlignment.emplace_back(ALIGNPATH::DELETE,deleteSize);
+    }
+}
+
+
+
+/// \param[in] maxQCRefSpan what is the longest flanking sequence length considered for the high quality qc requirement?
+static
+bool
+isSmallSVSegmentFilter(
+    const unsigned maxQCRefSpan,
+    const AlignerBase<int>& aligner,
+    const bool isLeadingPath,
+    const bool isComplex,
+    ALIGNPATH::path_t& apath)
+{
+    static const unsigned minAlignRefSpanSimple(30); ///< min reference length for alignment
+    static const unsigned minAlignReadLengthSimple(30); ///< min length of alignment after off-reference clipping
+    static const unsigned minAlignRefSpanComplex(35); ///< min reference length for alignment
+    static const unsigned minAlignReadLengthComplex(35); ///< min length of alignment after off-reference clipping
+    static const float minScoreFrac(0.75); ///< min fraction of optimal score in each contig sub-alignment:
+
+    const unsigned minAlignRefSpan(isComplex ? minAlignRefSpanComplex : minAlignRefSpanSimple);
+    const unsigned minAlignReadLength(isComplex ? minAlignReadLengthComplex : minAlignReadLengthSimple);
+
+    /// prepare apath by orienting it always going forward from the breakend and limiting the length to
+    /// the first maxQCRefSpan ref bases covered:
+    ///
+    if (isLeadingPath)
+    {
+        std::reverse(apath.begin(),apath.end());
+    }
+
+    apath_limit_ref_length(maxQCRefSpan,apath);
+
+    const unsigned refSize(apath_read_length(apath));
+    if (refSize < minAlignRefSpan)
+    {
+        return true;
+    }
+
+    const unsigned pathSize(apath_read_length(apath));
+    const unsigned clipSize(apath_soft_clip_trail_size(apath));
+
+    assert(clipSize <= pathSize);
+
+    const unsigned clippedPathSize(pathSize-clipSize);
+
+    if (clippedPathSize < minAlignReadLength)
+    {
+#ifdef DEBUG_REFINER
+//        log_os << "Rejecting highest scoring contig sub-alignment. isFirst?: " << isFirstRead << ". Sub-alignmnet read length after clipping is: " << clippedPathSize << " min size is: " << minAlignReadLength << "\n";
+#endif
+        return true;
+    }
+
+    const int nonClipScore(std::max(0,aligner.getPathScore(apath, false)));
+    const int optimalScore(clippedPathSize * aligner.getScores().match);
+
+    const float scoreFrac(static_cast<float>(nonClipScore)/static_cast<float>(optimalScore));
+    if (scoreFrac < minScoreFrac)
+    {
+#ifdef DEBUG_REFINER
+//        log_os << "Rejecting highest scoring contig sub-alignment. isFirst?: " << isFirstRead << ". Fraction of optimal alignment score is: " << scoreFrac << " minScoreFrac: " << minScoreFrac << "\n";
+#endif
+        return true;
+    }
+
+    return false;
+}
+
+
+static
+int
+searchContig(
+    const std::string& targetSeq,
+    const std::string& querySeq,
+    const float mismatchRate)
+{
+    unsigned numOccur = 0;
+    const unsigned querySize = querySeq.size();
+    const unsigned targetSize = targetSeq.size();
+
+    if (querySize > targetSize) return numOccur;
+
+    // set the scanning start & end to make sure the candidate window is overlapping the breakpoint
+    const unsigned scanStart = 0;
+    const unsigned scanEnd = targetSize - querySize;
+
+    for (unsigned i(scanStart); i<= scanEnd; i++)
+    {
+        unsigned mismatches = 0;
+        for (unsigned j(0); j < querySize; j++)
+        {
+            if ((querySeq[j] != targetSeq[i+j]) || (querySeq[j] == 'N'))
+                mismatches ++;
+        }
+
+        if (float(mismatches)/float(querySize) <= mismatchRate)
+            numOccur++;
+    }
+
+    return numOccur;
+}
+
+
+/// test whether this single-node assembly is (1) an interesting variant above the minimum size and
+/// (2) passes QC otherwise (appropriate flanking regions, etc)
+///
+/// \param[in] maxQCRefSpan what is the longest flanking sequence length considered for the high quality qc requirement?
+///
+/// \return true if these segments are candidates
+///
+static
+bool
+isSmallAssemblerSVAlignment(
+    const unsigned maxQCRefSpan,
+    const AlignerBase<int>& aligner,
+    const Alignment& align,
+    const std::string& contigSeq,
+    const std::string& refSeq,
+    const unsigned minCandidateIndelSize,
+    std::vector<std::pair<unsigned,unsigned> >& candidateSegments,
+    const bool isLargeOnly = false)
+{
+    using namespace ALIGNPATH;
+
+    const path_t& apath(align.apath);
+
+    // (1) identify all indels above minimum size:
+    //
+    getLargeIndelSegments(apath, minCandidateIndelSize, candidateSegments);
+
+    // escape if there are no indels above the minimum size
+    if (candidateSegments.empty()) return false;
+
+    // complex is anything besides a simple single indel -- more than one indel or an insert/delete combination:
+    const bool isComplex((candidateSegments.size() > 1) || (candidateSegments[0].first != candidateSegments[0].second));
+
+    // loop through possible leading segments until a clean one is found:
+    //
+    while (true)
+    {
+        // test quality of alignment segments surrounding the variant region:
+        const unsigned firstCandIndelSegment(candidateSegments.front().first);
+        path_t leadingPath(apath.begin(), apath.begin()+firstCandIndelSegment);
+
+        static const bool isLeadingPath(true);
+        if (! isSmallSVSegmentFilter(maxQCRefSpan, aligner, isLeadingPath, isComplex, leadingPath))
+        {
+            break;
+        }
+
+        // escape if this was the last segment
+        if (1 == candidateSegments.size()) return false;
+
+        candidateSegments = std::vector<std::pair<unsigned,unsigned> >(candidateSegments.begin()+1,candidateSegments.end());
+    }
+
+    // loop through possible trailing segments until a clean one is found:
+    //
+    while (true)
+    {
+        // test quality of alignment segments surrounding the variant region:
+        const unsigned lastCandIndelSegment(candidateSegments.back().second);
+        path_t trailingPath(apath.begin()+lastCandIndelSegment+1, apath.end());
+
+        static const bool isLeadingPath(false);
+        if (! isSmallSVSegmentFilter(maxQCRefSpan, aligner, isLeadingPath, isComplex, trailingPath))
+        {
+            break;
+        }
+
+        // escape if this was the last segment
+        if (1 == candidateSegments.size()) return false;
+
+        candidateSegments.pop_back();
+    }
+
+
+    {
+        // TODO: iterate on all segments
+        const path_t apathTillSvStart(apath.begin(), apath.begin() + candidateSegments.front().first);
+        const path_t apathTillSvEnd(apath.begin(), apath.begin() + candidateSegments.back().second+1);
+
+        const int leftSize = apath_read_length(apathTillSvStart);
+        const int endPos = apath_read_length(apathTillSvEnd);
+        const int rightSize = contigSeq.length() - endPos;
+        const std::string leftContig = contigSeq.substr(0, leftSize);
+        const std::string rightContig = contigSeq.substr(endPos, rightSize);
+
+        const int searchWindow(500);
+        const float mismatchRate(0.05f);
+        const int refAlignStart = align.beginPos;
+        const int refAlignEnd = align.beginPos + apath_ref_length(apath);
+
+        // search leftContig in the downstream of refStart
+        const int leftSearchStart = std::max(0, refAlignEnd-searchWindow);
+        const std::string refSeq4LeftSearch = refSeq.substr(leftSearchStart, (refAlignEnd-leftSearchStart));
+        unsigned occurrences = searchContig(refSeq4LeftSearch, leftContig, mismatchRate);
+
+#ifdef DEBUG_CONTIG
+        log_os << __FUNCTION__ << ": refSeq4LeftSearch: \n" << refSeq4LeftSearch << "\n";
+        log_os << __FUNCTION__ << ": left contig has size " << leftSize << ":\n" << leftContig << "\n";
+        log_os << __FUNCTION__ << ": left contig occurrences " << occurrences << "\n";
+#endif
+        if (occurrences > 1) return false;
+
+        // search rightContig in the upstream of refEnd
+        const int rightSearchSize = std::min(searchWindow, int(refSeq.length()-refAlignStart));
+        const std::string refSeq4RightSearch = refSeq.substr(refAlignStart, rightSearchSize);
+        occurrences = searchContig(refSeq4RightSearch, rightContig, mismatchRate);
+
+#ifdef DEBUG_CONTIG
+        log_os << __FUNCTION__ << ": refSeq4RightSearch: \n" << refSeq4RightSearch << "\n";
+        log_os << __FUNCTION__ << ": right contig has size " << rightSize << ":\n" << rightContig << "\n";
+        log_os << __FUNCTION__ << ": right contig occurrences " << occurrences << "\n";
+#endif
+        if (occurrences > 1) return false;
+    }
+
+    if (isLargeOnly)
+    {
+        // only accept large indels in this case
+        typedef std::pair<unsigned,unsigned> segment_t;
+        std::vector<segment_t> tmpseg(candidateSegments);
+        candidateSegments.clear();
+        for (const segment_t& segment : tmpseg)
+        {
+            for (unsigned i(segment.first); i<=segment.second; ++i)
+            {
+                if (((apath[i].type == INSERT) && (apath[i].length >= 80)) ||
+                    ((apath[i].type == DELETE) && (apath[i].length >= 200)))
+                {
+                    candidateSegments.push_back(segment);
+                    break;
+                }
+            }
+        }
+    }
+
+    return (! candidateSegments.empty());
+}
+
+
+
+static const unsigned minSemiLargeInsertionLength(40); // if a large insertion is not complete assembled, it must be assembled at least this far into either side
+
+
+/// \params[in] trimInsertLength remove extra length from the end of the contig
+/// for the purpose of determining if the "unaligned" end is long enough
+///
+/// \return true if this is a left->right insert candidate
+///
+static
+bool
+isLargeInsertSegment(
+    const AlignerBase<int>& aligner,
+    const ALIGNPATH::path_t& apath,
+    unsigned& contigOffset,
+    unsigned& refOffset,
+    int& score,
+    const unsigned trimInsertLength = 0)
+{
+    using namespace ALIGNPATH;
+
+    static const unsigned minAlignReadLength(40); ///< min length of aligned portion of contig
+    static const unsigned minExtendedReadLength(minSemiLargeInsertionLength); ///< min length of unaligned portion of contig
+
+    static const unsigned minAlignRefSpan(40); ///< min reference length for alignment
+    static const float minScoreFrac(0.75); ///< min fraction of optimal score in each contig sub-alignment:
+
+    const unsigned pathSize(apath_read_length(apath));
+
+    /// first evaluate in the forward direction
+    score=(std::max(0,aligner.getMaxPathScore(apath, contigOffset, refOffset)));
+
+#ifdef DEBUG_REFINER
+    log_os << __FUNCTION__ << ": apath " << apath << "\n";
+    log_os << __FUNCTION__ << ": score/ref/contig " << score << " " << refOffset << " " << contigOffset << "\n";
+#endif
+
+    if (refOffset < minAlignRefSpan) return false;
+    if (contigOffset < minAlignReadLength) return false;
+
+    assert(contigOffset <= pathSize);
+    if ((pathSize-contigOffset) < (minExtendedReadLength+trimInsertLength)) return false;
+
+    const int optimalScore(contigOffset * aligner.getScores().match);
+
+    const float scoreFrac(static_cast<float>(score)/static_cast<float>(optimalScore));
+    if (scoreFrac < minScoreFrac) return false;
+
+    return true;
+}
+
+
+
+/// \return true if there is a large insert candidate
+///
+static
+bool
+isLargeInsertAlignment(
+    const GlobalAligner<int> aligner,
+    const ALIGNPATH::path_t& apath,
+    LargeInsertionInfo& insertInfo)
+{
+    using namespace ALIGNPATH;
+
+    insertInfo.isLeftCandidate=isLargeInsertSegment(aligner, apath, insertInfo.contigOffset, insertInfo.refOffset, insertInfo.score);
+
+    if (insertInfo.isLeftCandidate)
+    {
+        return true;
+    }
+
+    ALIGNPATH::path_t apath_rev(apath);
+    std::reverse(apath_rev.begin(),apath_rev.end());
+
+    insertInfo.isRightCandidate=isLargeInsertSegment(aligner,apath_rev,insertInfo.contigOffset,insertInfo.refOffset, insertInfo.score);
+
+    if (insertInfo.isRightCandidate)
+    {
+        const unsigned contigSize(apath_read_length(apath));
+        const unsigned refSize(apath_ref_length(apath));
+        insertInfo.contigOffset=contigSize-insertInfo.contigOffset;
+        insertInfo.refOffset=refSize-insertInfo.refOffset;
+        return true;
+    }
+
+    return false;
+}
+
+
+
+/// \return true if the alignment represents an acceptable complete insertion:
+///
+static
+bool
+isFinishedLargeInsertAlignment(
+    const GlobalAligner<int> aligner,
+    const ALIGNPATH::path_t& apath,
+    const std::pair<unsigned, unsigned>& insertSegment,
+    const unsigned middleSize)
+{
+    using namespace ALIGNPATH;
+
+    const path_t apath_left(apath.begin(), apath.begin()+insertSegment.second+1);;
+
+    LargeInsertionInfo insertInfo;
+    insertInfo.isLeftCandidate=isLargeInsertSegment(aligner, apath_left, insertInfo.contigOffset, insertInfo.
+                                                    refOffset, insertInfo.score, middleSize);
+
+    path_t apath_rev(apath.begin()+insertSegment.first, apath.end());;
+    std::reverse(apath_rev.begin(),apath_rev.end());
+
+    insertInfo.isRightCandidate=isLargeInsertSegment(aligner, apath_rev, insertInfo.contigOffset, insertInfo.
+                                                     refOffset, insertInfo.score, middleSize);
+
+    return (insertInfo.isLeftCandidate && insertInfo.isRightCandidate);
+}
+
+
+
+/// get the range over which an alignment element can vary with equal edit distance
+///
+/// \param[in] refRange range of the event (ie indel) of interest in reference coordinates
+/// \param[in] readRange range of the event (ie indel) of interest in read coordinates
+///
+/// range coordinates are zero indexed and start at the first affected positions (so are not like vcf coordinates)
+/// for instance:
+////  the deletion 10M1D10M would have refRange(10,11), readRange(10,10)
+////  the insertion 10M1I10M would have refRange(10,10), readRange(10,11)
+///
+static
+known_pos_range2
+getVariantRange(
+    const std::string& ref,
+    const known_pos_range2& refRange,
+    const std::string& read,
+    const known_pos_range2& readRange)
+{
+#ifdef DEBUG_VARR
+    log_os << __FUNCTION__ << ": refRange " << refRange << "\n";
+    log_os << __FUNCTION__ << ": ref:\n";
+    printSeq(ref, log_os);
+    log_os << "\n";
+    log_os << __FUNCTION__ << ": readRange " << readRange << "\n";
+    log_os << __FUNCTION__ << ": read:\n";
+    printSeq(read, log_os);
+    log_os << "\n";
+#endif
+
+    // check how far we can slide to the right:
+    const pos_t maxRightOffset(std::min(ref.size()-refRange.end_pos(), read.size()-readRange.end_pos()));
+    pos_t rightOffset(0);
+    for (; rightOffset<maxRightOffset; ++rightOffset)
+    {
+        const char refSym(ref[refRange.begin_pos()+rightOffset]);
+        const char readSym(read[readRange.begin_pos()+rightOffset]);
+        if (refSym != readSym) break;
+    }
+
+    // check how far we can slide to the left:
+    const pos_t minLeftOffset(std::max(-refRange.begin_pos(), -readRange.begin_pos()));
+    pos_t leftOffset(0);
+    for (; leftOffset>=minLeftOffset; --leftOffset)
+    {
+        const char refSym(ref[refRange.end_pos()+leftOffset-1]);
+        const char readSym(read[readRange.end_pos()+leftOffset-1]);
+        if (refSym != readSym) break;
+    }
+
+#ifdef DEBUG_VARR
+    log_os << __FUNCTION__ << ": left/right offset " << leftOffset << "/" << rightOffset << "\n";
+#endif
+
+    return known_pos_range2(leftOffset,rightOffset);
+}
+
+
+
+/// process smallSV alignment section into a usable sv candidate
+static
+void
+setSmallCandSV(
+    const reference_contig_segment& ref,
+    const std::string& contig,
+    const Alignment& align,
+    const std::pair<unsigned,unsigned>& segRange,
+    SVCandidate& sv)
+{
+#ifdef DEBUG_VARR
+    log_os << __FUNCTION__ << ": align " << align << "\n";
+    log_os << __FUNCTION__ << ": segRange [" << segRange.first << "," << segRange.second << "]\n";
+    log_os << __FUNCTION__ << ": inputSV " << sv << "\n";
+#endif
+    sv.setPrecise();
+
+    // get readRange and refRange, which are translations of segRange into
+    // read and reference offsets:
+    known_pos_range2 readRange;
+    known_pos_range2 refRange;
+    {
+        using namespace ALIGNPATH;
+
+        pos_t readPos(0);
+        pos_t refPos(align.beginPos);
+
+        const path_t& apath(align.apath);
+        const unsigned as(apath.size());
+        for (unsigned i(0); i<as; ++i)
+        {
+            const path_segment& ps(apath[i]);
+            if (i == segRange.first)
+            {
+                refRange.set_begin_pos(refPos);
+                readRange.set_begin_pos(readPos);
+            }
+
+            if (is_segment_type_ref_length(ps.type)) refPos += ps.length;
+            if (is_segment_type_read_length(ps.type)) readPos += ps.length;
+
+            if (i == segRange.second)
+            {
+                refRange.set_end_pos(refPos);
+                readRange.set_end_pos(readPos);
+            }
+        }
+    }
+
+    // by how many positions can the alignment position vary with the same alignment score?:
+    const known_pos_range2 cipos(getVariantRange(ref.seq(),refRange, contig, readRange));
+
+    // cipos for a precise variant is expected to start from 0 and extend forward zero to many bases
+    if (cipos.begin_pos() != 0)
+    {
+        using namespace illumina::common;
+
+        std::ostringstream oss;
+        oss << "ERROR: Attempting to convert alignment to sv candidate."
+            << " contigSize: " << contig.size()
+            << " alignment: " << align
+            << " segments: [" << segRange.first << "," << segRange.second << "]\n"
+            << "\treadRange: " << readRange << "\n"
+            << "\trefRange: " << refRange << "\n"
+            << "\tcipos: " << cipos << "\n";
+        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+    }
+
+    sv.bp1.state = SVBreakendState::RIGHT_OPEN;
+    const pos_t beginPos(ref.get_offset()+refRange.begin_pos()-1);
+    sv.bp1.interval.range.set_range(beginPos,beginPos+cipos.end_pos()+1);
+
+    sv.bp2.state = SVBreakendState::LEFT_OPEN;
+    const pos_t endPos(ref.get_offset()+refRange.end_pos());
+    sv.bp2.interval.range.set_range(endPos,endPos+cipos.end_pos()+1);
+    sv.bp2.interval.tid = sv.bp1.interval.tid;
+
+    sv.insertSeq = contig.substr(readRange.begin_pos(),readRange.size());
+
+    // add CIGAR for all indels:
+    sv.insertAlignment = ALIGNPATH::path_t(align.apath.begin()+segRange.first, align.apath.begin()+segRange.second+1);
+}
+
+
+
+static
+known_pos_range2
+getInsertTrim(
+    const ALIGNPATH::path_t& apath,
+    const std::pair<unsigned,unsigned>& segRange)
+{
+    assert(segRange.first <= segRange.second);
+
+    using namespace ALIGNPATH;
+
+    known_pos_range2 range;
+
+    pos_t readPos(0);
+
+    const unsigned as(apath.size());
+    for (unsigned i(0); i<as; ++i)
+    {
+        const path_segment& ps(apath[i]);
+        if (i == segRange.first)
+        {
+            range.set_begin_pos(readPos);
+        }
+
+        if (is_segment_type_read_length(ps.type)) readPos += ps.length;
+
+        if (i == segRange.second)
+        {
+            range.set_end_pos(readPos);
+            return range;
+        }
+    }
+
+    assert(false && "segRange not found");
+    return range;
+}
+
+
+
+// search for combinations of left and right-side insertion candidates to find a good insertion pair
+static
+void
+processLargeInsertion(
+    const SVCandidate& sv,
+    const pos_t leadingCut,
+    const pos_t trailingCut,
+    const GlobalAligner<int>& largeInsertCompleteAligner,
+    const std::vector<unsigned>& largeInsertionCandidateIndex,
+    const std::set<pos_t>& excludedPos,
+    SVCandidateAssemblyData& assemblyData)
+{
+    if (largeInsertionCandidateIndex.empty()) return;
+
+#ifdef DEBUG_REFINER
+    static const std::string logtag("processLargeInsertion: ");
+    log_os << logtag << "starting large insertion search\n";
+#endif
+
+    bool isLargeInsertionPair(false);
+    unsigned largeInsertionLeftIndex(0);
+    unsigned largeInsertionRightIndex(0);
+    int bestBreakDist(0);
+    int bestBreakScore(0);
+
+    // try to pair up a large insertion candidate
+    //
+    // just do a dumb, all against all evaluation for now, if there's more than one left-right candidate set,
+    // resolve according to (1) min ref distance and (2) best combined score
+    static const int maxBreakDist(35);
+
+    const unsigned candCount(largeInsertionCandidateIndex.size());
+    for (unsigned candCount1(0); (candCount1+1)<candCount; ++candCount1)
+    {
+        const unsigned candIndex1(largeInsertionCandidateIndex[candCount1]);
+        const Alignment& align1(assemblyData.smallSVAlignments[candIndex1].align);
+        const LargeInsertionInfo& insert1(assemblyData.largeInsertInfo[candIndex1]);
+        for (unsigned candCount2(candCount1+1); candCount2<candCount; ++candCount2)
+        {
+            const unsigned candIndex2(largeInsertionCandidateIndex[candCount2]);
+            const Alignment& align2(assemblyData.smallSVAlignments[candIndex2].align);
+            const LargeInsertionInfo& insert2(assemblyData.largeInsertInfo[candIndex2]);
+            if (! ((insert1.isLeftCandidate && insert2.isRightCandidate) ||
+                   (insert2.isLeftCandidate && insert1.isRightCandidate))) continue;
+
+            const int breakDist(std::abs((long int)(align1.beginPos+insert1.refOffset)-(long int)(align2.beginPos+insert2.refOffset)));
+
+            if (breakDist > maxBreakDist) continue;
+
+            const int breakScore(insert1.score+insert2.score);
+
+            if ( (! isLargeInsertionPair) || (breakDist<bestBreakDist) || (breakScore < bestBreakScore))
+            {
+                /// set new large insertion candidate:
+                isLargeInsertionPair=true;
+                largeInsertionLeftIndex=candIndex1;
+                largeInsertionRightIndex=candIndex2;
+                if (insert1.isRightCandidate)
+                {
+                    std::swap(largeInsertionLeftIndex,largeInsertionRightIndex);
+                }
+                bestBreakDist=breakDist;
+                bestBreakScore=breakScore;
+
+#ifdef DEBUG_REFINER
+                log_os << logtag << "setting new large insertion candidate. Score: " << breakScore << "\n";
+#endif
+            }
+        }
+    }
+
+    // no large insertion found:
+    if (! isLargeInsertionPair) return;
+
+    // found large insertion, insert this into data structures for downstream scoring/reporting:
+    {
+        const std::string& align1RefStr(assemblyData.bp1ref.seq());
+        const unsigned contigCount(assemblyData.contigs.size());
+
+        static const std::string middle("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN");
+        const unsigned middleSize(middle.size());
+
+        assemblyData.contigs.resize(contigCount+1);
+        assemblyData.smallSVAlignments.resize(contigCount+1);
+        assemblyData.smallSVSegments.resize(contigCount+1);
+        assemblyData.extendedContigs.resize(contigCount+1);
+
+        AssembledContig& fakeContig(assemblyData.contigs[contigCount]);
+        SVCandidateAssemblyData::SmallAlignmentResultType& fakeAlignment(assemblyData.smallSVAlignments[contigCount]);
+        std::vector<std::pair<unsigned,unsigned> >& fakeSegments(assemblyData.smallSVSegments[contigCount]);
+        std::string& fakeExtendedContig(assemblyData.extendedContigs[contigCount]);
+
+        const AssembledContig& leftContig(assemblyData.contigs[largeInsertionLeftIndex]);
+        const AssembledContig& rightContig(assemblyData.contigs[largeInsertionRightIndex]);
+
+        fakeContig=leftContig;
+        fakeContig.seq += (middle + rightContig.seq);
+
+        const AssembledContig& constFakeContig(fakeContig);
+
+        largeInsertCompleteAligner.align(
+            constFakeContig.seq.begin(), constFakeContig.seq.end(),
+            align1RefStr.begin() + leadingCut, align1RefStr.end() - trailingCut,
+            fakeAlignment);
+
+        fakeAlignment.align.beginPos += leadingCut;
+
+#ifdef DEBUG_REFINER
+        log_os << logtag << "large insertion fake alignment: " << fakeAlignment << "\n";
+#endif
+
+        fakeSegments.clear();
+        getLargestInsertSegment(fakeAlignment.align.apath, middleSize, fakeSegments);
+
+        // QC segments
+        if ((1 != fakeSegments.size()) || (fakeSegments[0].second < fakeSegments[0].first))
+        {
+            return;
+        }
+
+        // QC the resulting alignment:
+        if (! isFinishedLargeInsertAlignment(largeInsertCompleteAligner,fakeAlignment.align.apath, fakeSegments[0], middleSize))
+        {
+            return;
+        }
+
+        // final prep step: check left and right partial insert sequences -- this is a last chance to QC for anomalies and get out:
+        //
+        const known_pos_range2 insertTrim(getInsertTrim(fakeAlignment.align.apath,fakeSegments[0]));
+        {
+            static const int minFlankSize(minSemiLargeInsertionLength);
+            if ((insertTrim.begin_pos()+minFlankSize) > static_cast<pos_t>(leftContig.seq.size()))
+            {
+                return;
+            }
+
+            const pos_t rightOffset(leftContig.seq.size()+middle.size());
+            if ((rightOffset+minFlankSize) > insertTrim.end_pos())
+            {
+                return;
+            }
+        }
+
+#ifdef DEBUG_REFINER
+        log_os << logtag << "large insertion passed QC\n";
+#endif
+
+        getExtendedContig(fakeAlignment, fakeContig.seq, align1RefStr, fakeExtendedContig);
+
+        /// this section mostly imitates the regular SV build below, now that we've constructed our fake contig/alignment
+        SVCandidate newSV(sv);
+        newSV.assemblyAlignIndex = contigCount;
+        newSV.assemblySegmentIndex = 0;
+        setSmallCandSV(assemblyData.bp1ref, fakeContig.seq, fakeAlignment.align, fakeSegments[0], newSV);
+
+        /// check if this matches a fully assembled insertion:
+        const pos_t startPos(newSV.bp1.interval.range.begin_pos());
+        if (excludedPos.count(startPos)) return;
+
+        newSV.isUnknownSizeInsertion = true;
+
+        // final step: get left and right partial insert sequences:
+        //
+        assert(insertTrim.begin_pos() < static_cast<pos_t>(leftContig.seq.size()));
+        newSV.unknownSizeInsertionLeftSeq = leftContig.seq.substr(insertTrim.begin_pos());
+
+        const pos_t rightOffset(leftContig.seq.size()+middle.size());
+        assert(rightOffset < insertTrim.end_pos());
+
+        newSV.unknownSizeInsertionRightSeq = rightContig.seq.substr(0,(insertTrim.end_pos()-rightOffset));
+
+        assemblyData.svs.push_back(newSV);
+    }
+}
+
+
+
+SVCandidateAssemblyRefiner::
+SVCandidateAssemblyRefiner(
+    const GSCOptions& opt,
+    const bam_header_info& header,
+    const AllCounts& counts,
+    EdgeRuntimeTracker& edgeTracker) :
+    _opt(opt),
+    _header(header),
+    _smallSVAssembler(opt.scanOpt, opt.refineOpt.smallSVAssembleOpt, opt.alignFileOpt,
+                      opt.statsFilename, opt.chromDepthFilename, header, counts, opt.isRNA, edgeTracker.remoteTime),
+    _spanningAssembler(opt.scanOpt,
+                       !opt.isRNA ? opt.refineOpt.spanningAssembleOpt : opt.refineOpt.RNAspanningAssembleOpt,
+                       opt.alignFileOpt, opt.statsFilename, opt.chromDepthFilename, header, counts, opt.isRNA, edgeTracker.remoteTime),
+    _smallSVAligner(opt.refineOpt.smallSVAlignScores),
+    _largeSVAligner(opt.refineOpt.largeSVAlignScores,opt.refineOpt.largeGapOpenScore),
+    _largeInsertEdgeAligner(opt.refineOpt.largeInsertEdgeAlignScores),
+    _largeInsertCompleteAligner(opt.refineOpt.largeInsertCompleteAlignScores),
+    _spanningAligner(opt.refineOpt.spanningAlignScores, opt.refineOpt.jumpScore),
+    _RNASpanningAligner(
+        opt.refineOpt.RNAspanningAlignScores,
+        opt.refineOpt.RNAJumpScore,
+        opt.refineOpt.RNAIntronOpenScore,
+        opt.refineOpt.RNAIntronOffEdgeScore)
+{}
+
+
+
+void
+SVCandidateAssemblyRefiner::
+getCandidateAssemblyData(
+    const SVCandidate& sv,
+    const SVCandidateSetData& /*svData*/,
+    const bool isRNA,
+    const bool isFindLargeInsertions,
+    SVCandidateAssemblyData& assemblyData) const
+{
+#ifdef DEBUG_REFINER
+    log_os << __FUNCTION__ << ": START sv " << sv;
+#endif
+
+    assemblyData.clear();
+
+    // separate the problem into different assembly categories:
+    //
+    if (isSpanningSV(sv))
+    {
+        // record the spanning status of the original low-resolution candidate:
+        assemblyData.isCandidateSpanning=true;
+
+        // this case assumes two suspected breakends with a direction to each, most common large scale SV case:
+        getJumpAssembly(sv, isRNA, isFindLargeInsertions, assemblyData);
+    }
+    else if (isComplexSV(sv))
+    {
+        // record the spanning status of the original low-resolution candidate:
+        assemblyData.isCandidateSpanning=false;
+
+        // this case assumes a single-interval local assembly, this is the most common case for small-scale SVs/indels
+        getSmallSVAssembly(sv, isFindLargeInsertions, assemblyData);
+    }
+    else
+    {
+        log_os << "Unknown candidate SV: " << sv << "\n";
+        assert(false && "Unknown candidate SV type");
+    }
+}
+
+
+
+/// Represents a stretch of reference sequence excluded from alignment by the kmer matcher.
+struct exclusion_block
+{
+    exclusion_block(const unsigned s, const unsigned l, const unsigned sp)
+        : start(s), length(l), nSpacer(sp) {}
+    unsigned start; // Start of excluded region
+    unsigned length; // Nunmber of bp excluded
+    unsigned nSpacer; // Number of 'N' added in place of excluded sequence
+};
+
+
+
+/// Translate a reduced reference position to the original reference coordinates
+static
+unsigned
+translateMaskedPos(
+    const std::vector<exclusion_block>& exclBlocks,
+    const unsigned maskedPos)
+{
+    int offset = 0;
+    for (const auto& cblock : exclBlocks)
+    {
+        if (cblock.start > (offset + maskedPos)) break;
+        offset += cblock.length - cblock.nSpacer;
+    }
+    return offset + maskedPos;
+}
+
+
+
+/// Translate an alignment made against a reduced reference to the original reference coordinates
+static
+bool
+translateMaskedAlignment(
+    Alignment& align,
+    const std::vector<exclusion_block>& exclBlocks)
+{
+    using namespace ALIGNPATH;
+#ifdef DEBUG_KMER
+    log_os << __FUNCTION__ << " original: " << align << "\n";
+#endif
+    path_t newPath;
+    pos_t cpos = align.beginPos;
+    for (const path_segment& seg : align.apath)
+    {
+        if (!is_segment_type_ref_length(seg.type))
+        {
+            newPath.push_back(seg);
+        }
+        else
+        {
+            const unsigned length = translateMaskedPos(exclBlocks, cpos + seg.length) -
+                                    translateMaskedPos(exclBlocks, cpos);
+            if (is_segment_align_match(seg.type) && (length != seg.length)) return false;
+#ifdef DEBUG_KMER
+            log_os << __FUNCTION__ << " SEGMENT " << seg.type << " " << seg.length << "\n";
+            log_os << "\tlength " << length << "\n";
+            log_os << "\tcpos " << cpos << "\n";
+#endif
+            cpos += seg.length;
+            newPath.emplace_back(seg.type, length);
+        }
+    }
+    if (align.apath.size() > 0)
+    {
+        align.beginPos = translateMaskedPos(exclBlocks, align.beginPos);
+        align.apath = newPath;
+    }
+#ifdef DEBUG_KMER
+    log_os << __FUNCTION__ << " final: " << align << "\n";
+#endif
+    return true;
+}
+
+
+namespace
+{
+
+/// Returns a reduced reference sequence where long stretches without kmer matches to the contig are removed
+template <typename SymIter>
+std::string kmerMaskReference(
+    const SymIter refSeqStart,
+    const SymIter refSeqEnd,
+    const std::string& contig,
+    const int nSpacer,
+    std::vector<exclusion_block>& exclBlocks)
+{
+    // Hash all kmers in the contig
+    static const int merSize(10);
+    std::unordered_set<std::string> contigHash;
+    for (unsigned contigMerIndex(0); contigMerIndex<(contig.size() - (merSize - 1)); ++contigMerIndex)
+    {
+        contigHash.insert(contig.substr(contigMerIndex,merSize));
+    }
+    // Mask the reference (and keep track of excluded regions for coordinate translation later)
+    static const int minExclusion(1000);
+    static const int padding(50); // Amount of sequence included around each kmer hit.
+    std::string maskedRef;
+    const SymIter maxRef = refSeqEnd - (merSize - 1);
+    SymIter potExclStart = refSeqStart;
+    SymIter inclStart = refSeqStart;
+    for (SymIter refIt = refSeqStart; refIt != maxRef; refIt++)
+    {
+        if (contigHash.count(std::string(refIt, refIt + merSize)) != 0)
+        {
+            if ((refIt - potExclStart) > (minExclusion + padding))
+            {
+                unsigned spacer(0);
+                if (potExclStart > refSeqStart)
+                {
+                    maskedRef.append(std::string(inclStart, potExclStart));
+                    maskedRef.append(nSpacer, 'N');
+                    spacer = nSpacer;
+                }
+                inclStart = refIt - padding;
+                exclBlocks.emplace_back(potExclStart - refSeqStart, inclStart - potExclStart, spacer);
+            }
+            potExclStart = refIt + padding;
+        }
+    }
+    maskedRef.append(std::string(inclStart, std::min(maxRef, potExclStart)));
+#ifdef DEBUG_KMER
+    log_os << __FUNCTION__ << "Reduced to " << maskedRef << '\n';
+    log_os << __FUNCTION__ << " exclBlocks\n\t";
+    for (const auto block : exclBlocks)
+        log_os << " " << block.start << ":" << block.length << ":" << block.nSpacer;
+    log_os << "\n";
+#endif
+    if (maskedRef.empty()) maskedRef.append(nSpacer, 'N');
+    return maskedRef;
+}
+
+}
+
+
+
+/// convert jump alignment results into an SVCandidate
+///
+static
+void
+generateRefinedSVCandidateFromJumpAlignment(
+    const BPOrientation& bporient,
+    const SVCandidateAssemblyData& assemblyData,
+    const unsigned contigIndex,
+    SVCandidate& sv)
+{
+    const SVCandidateAssemblyData::JumpAlignmentResultType& align(assemblyData.spanningAlignments[contigIndex]);
+
+    // first get each alignment associated with the correct breakend:
+    const Alignment* bp1AlignPtr(&align.align1);
+    const Alignment* bp2AlignPtr(&align.align2);
+
+    if (bporient.isBp2AlignedFirst) std::swap(bp1AlignPtr, bp2AlignPtr);
+
+    // summarize usable output information in a second SVBreakend
+    // object -- this is the 'refined' sv:
+    sv.assemblyAlignIndex = contigIndex;
+    sv.assemblySegmentIndex = 0;
+
+    sv.setPrecise();
+
+    adjustAssembledBreakend(*bp1AlignPtr, (! bporient.isBp2AlignedFirst), align.jumpRange, assemblyData.bp1ref, bporient.isBp1Reversed, sv.bp1);
+    adjustAssembledBreakend(*bp2AlignPtr, (bporient.isBp2AlignedFirst), align.jumpRange, assemblyData.bp2ref, bporient.isBp2Reversed, sv.bp2);
+}
+
+
+
+/// convert jump alignment results into an SVCandidate and add all
+/// extra data required for VCF output
+///
+static
+void
+generateRefinedVCFSVCandidateFromJumpAlignment(
+    const BPOrientation& bporient,
+    const SVCandidateAssemblyData& assemblyData,
+    const unsigned contigIndex,
+    SVCandidate& sv)
+{
+    generateRefinedSVCandidateFromJumpAlignment(bporient, assemblyData, contigIndex, sv);
+
+    const AssembledContig& contig(assemblyData.contigs[contigIndex]);
+    const SVCandidateAssemblyData::JumpAlignmentResultType& align(assemblyData.spanningAlignments[contigIndex]);
+
+    // fill in insertSeq:
+    sv.insertSeq.clear();
+    if (align.jumpInsertSize > 0)
+    {
+        getFwdStrandInsertSegment(align, contig.seq, bporient.isBp1Reversed, sv.insertSeq);
+    }
+
+    // add CIGAR for any simple (insert/delete) cases:
+    addCigarToSpanningAlignment(sv);
+}
+
+
+
+void
+SVCandidateAssemblyRefiner::
+getJumpAssembly(
+    const SVCandidate& sv,
+    const bool isRNA,
+    const bool isFindLargeInsertions,
+    SVCandidateAssemblyData& assemblyData) const
+{
+#ifdef DEBUG_REFINER
+    log_os << __FUNCTION__ << ": START\n";
+    if (isRNA)
+    {
+        log_os << __FUNCTION__ << ": RNA\n";
+    }
+#endif
+
+    // This determines by how much we extend the reference sequence
+    // around the breakend region for all discovery and scoring
+    // operations. It is possible to discover breakends and small
+    // indels in this expanded region.
+    //
+    const pos_t extraRefEdgeSize(isRNA ? 25000 : 250);
+
+    // This determines by how much we extend the reference sequence
+    // around the breakend region for all operations except alignment
+    // of the contig back to the reference.
+    //
+    // The primary motivation for this value is to improve our ability
+    // to find reads which support the breakend when quality scoring
+    // takes place (a subsequent step outside of this function), but
+    // without expanding the regions where a breakend can possibly be
+    // found (ie. without the risk of additional false positives)
+    //
+    // The extra reference sequence is used for:
+    // - Extraction of breakend associated reads (reference sequence
+    // used to find reads which poorly match the reference in this
+    // case)
+    // - Contig extension after alignment, this means that the
+    // extended region will be used for read support scoring (later
+    // during Q-value generation)
+    //
+    // The extra reference sequence is removed for:
+    // - contig alignment, this means that actual breakpoint discovery
+    // - will not occur in the extended region
+    //
+    const pos_t extraRefSplitSize(100);
+
+    const pos_t extraRefSize(extraRefEdgeSize+extraRefSplitSize);
+
+    // if the breakends have a simple insert/delete orientation and
+    // the alignment regions overlap, then handle this case as a local
+    // assembly problem:
+    if (sv.bp1.interval.tid == sv.bp2.interval.tid)
+    {
+        if (! SVBreakendState::isSameOrientation(sv.bp1.state,sv.bp2.state))
+        {
+            const SV_TYPE::index_t svType(getSVType(sv));
+            if ((svType == SV_TYPE::INDEL) || (svType == SV_TYPE::COMPLEX))
+            {
+                if ( isRefRegionOverlap( _header, extraRefSize, sv) )
+                {
+                    // transform SV into a single region format:
+                    SVCandidate singleSV = sv;
+                    singleSV.bp1.state = SVBreakendState::COMPLEX;
+                    singleSV.bp2.state = SVBreakendState::UNKNOWN;
+                    singleSV.bp1.interval.range.merge_range(sv.bp2.interval.range);
+
+#ifdef DEBUG_REFINER
+                    log_os << __FUNCTION__ << ": Candidate breakends regions are too close, transferring problem to local assembler\n";
+#endif
+
+                    getSmallSVAssembly(singleSV, isFindLargeInsertions, assemblyData);
+                    return;
+                }
+            }
+        }
+    }
+
+    assemblyData.isSpanning = true;
+    BPOrientation& bporient(assemblyData.bporient);
+
+    bporient.isBp1First = sv.isForward();
+    bporient.isStranded = sv.isStranded();
+    if (_opt.isRNA)
+    {
+        bporient.isBp1First = !sv.isForward(); // RNA-seq reads generate candidates in the opposite direction of the RNA
+    }
+    //
+    // based on sv candidate, we classify the expected relationship
+    // between the contig and the sv breakends:
+    //
+    if (sv.bp1.state != sv.bp2.state)
+    {
+        // if there's one right-open breakend and one left-open breakend, no matter the bp1/bp2 chromosome and
+        // relative bp1/bp2 order etc. we:
+        // 1. don't need to do any read/reference reversals
+        // 2. always treat the right-open breakend as the first alignment region in order:
+        //
+        if (sv.bp2.state == SVBreakendState::RIGHT_OPEN)
+        {
+            bporient.isBp2AlignedFirst = true;
+        }
+    }
+    else
+    {
+        // If both breakends open in the same direction, then:
+        // 1. the reads from one breakend need to be reversed
+        // 2. the reference from that same breakend needs to be reversed
+        // 3. Treat the un-reversed RIGHT_OPEN or reversed LEFT_OPEN as the first alignment region in order
+        //      Note that in the scheme below, we chose which bp to reverse so that no-reordering is required
+        //
+        if (sv.bp1.state == SVBreakendState::RIGHT_OPEN)
+        {
+            bporient.isBp2Reversed = true;
+        }
+        else
+        {
+            bporient.isBp1Reversed = true;
+        }
+    }
+
+    // there's always a small chance that our region could fall
+    // completely off the edge of the reference b/c of circular
+    // chromosomes, this can't be treated as a bug -- it's a
+    // legitimate breakend hypothesis that we just aren't setup to
+    // handle correctly, so we punt this case:
+    if (! isRefRegionValid(_header, sv.bp1.interval)) return;
+    if (! isRefRegionValid(_header, sv.bp2.interval)) return;
+
+    // next we extract the reference sequence around both breakends
+    //
+    // the 'trim' values below refer to the difference between the
+    // breakend reference region requested and the region returned
+    // after accounting for chromosome edges. The trim values will
+    // almost always be zero for large chromosomes.
+    //
+    unsigned bp1LeadingTrim;
+    unsigned bp1TrailingTrim;
+    unsigned bp2LeadingTrim;
+    unsigned bp2TrailingTrim;
+    getSVReferenceSegments(
+        _opt.referenceFilename, _header, extraRefSize, sv,
+        assemblyData.bp1ref, assemblyData.bp2ref,
+        bp1LeadingTrim, bp1TrailingTrim, bp2LeadingTrim, bp2TrailingTrim);
+
+    // the 'cut' values below represent sequence which will be removed
+    // from the edges of the reference region for each breakend.
+    pos_t align1LeadingCut(std::max(0,extraRefSplitSize - static_cast<pos_t>(bp1LeadingTrim)));
+    pos_t align1TrailingCut(std::max(0,extraRefSplitSize - static_cast<pos_t>(bp1TrailingTrim)));
+    pos_t align2LeadingCut(std::max(0,extraRefSplitSize - static_cast<pos_t>(bp2LeadingTrim)));
+    pos_t align2TrailingCut(std::max(0,extraRefSplitSize - static_cast<pos_t>(bp2TrailingTrim)));
+
+    // assemble contig(s) spanning the breakend:
+    _spanningAssembler.assembleSVBreakends(
+        sv.bp1, sv.bp2,
+        bporient.isBp1Reversed, bporient.isBp2Reversed,
+        assemblyData.bp1ref, assemblyData.bp2ref,
+        assemblyData.contigs);
+
+    std::string bp1refSeq = assemblyData.bp1ref.seq();
+    std::string bp2refSeq = assemblyData.bp2ref.seq();
+    if (bporient.isBp1Reversed)
+    {
+        reverseCompStr(bp1refSeq);
+        std::swap(align1LeadingCut, align1TrailingCut);
+    }
+    if (bporient.isBp2Reversed)
+    {
+        reverseCompStr(bp2refSeq);
+        std::swap(align2LeadingCut, align2TrailingCut);
+    }
+    const std::string* align1RefStrPtr(&bp1refSeq);
+    const std::string* align2RefStrPtr(&bp2refSeq);
+
+    if (bporient.isBp2AlignedFirst)
+    {
+        std::swap(align1RefStrPtr, align2RefStrPtr);
+
+        std::swap(align1LeadingCut, align2LeadingCut);
+        std::swap(align1TrailingCut, align2TrailingCut);
+    }
+
+#ifdef DEBUG_REFINER
+    log_os << __FUNCTION__ << ": al1RefSize/Seq: " << align1RefStrPtr->size() << '\n';
+    printSeq(*align1RefStrPtr,log_os);
+    log_os << '\n';
+    log_os << __FUNCTION__ << ": al2Refsize/Seq: " << align2RefStrPtr->size() << '\n';
+    printSeq(*align2RefStrPtr,log_os);
+    log_os << '\n';
+#endif
+
+    const unsigned contigCount(assemblyData.contigs.size());
+
+#ifdef DEBUG_REFINER
+    log_os << __FUNCTION__ << ": contigCount: " << contigCount << "\n";
+    for (unsigned contigIndex(0); contigIndex<contigCount; ++contigIndex)
+    {
+        const AssembledContig& contig(assemblyData.contigs[contigIndex]);
+        log_os << __FUNCTION__ << ": contigIndex: " << contigIndex << " contig: " << contig;
+    }
+#endif
+
+    // make sure an alignment object exists for every contig, even if
+    // it's empty:
+    assemblyData.spanningAlignments.resize(contigCount);
+
+    bool isHighScore(false);
+    unsigned highScoreIndex(0);
+
+    for (unsigned contigIndex(0); contigIndex<contigCount; ++contigIndex)
+    {
+        const AssembledContig& contig(assemblyData.contigs[contigIndex]);
+
+#ifdef DEBUG_REFINER
+        log_os << __FUNCTION__ << ": start aligning contigIndex: " << contigIndex << "\n";
+#endif
+
+        JumpAlignmentResult<int>& alignment(assemblyData.spanningAlignments[contigIndex]);
+
+        if (_opt.isRNA)
+        {
+#ifdef DEBUG_REFINER
+            log_os << __FUNCTION__ << " RNA alignment\n";
+#endif
+            static const int nSpacer(25);
+            std::vector<exclusion_block> exclBlocks1;
+            const std::string cutRef1 = kmerMaskReference(align1RefStrPtr->begin() + align1LeadingCut,
+                                                          align1RefStrPtr->end() - align1TrailingCut,
+                                                          contig.seq, nSpacer, exclBlocks1);
+            std::vector<exclusion_block> exclBlocks2;
+            const std::string cutRef2 = kmerMaskReference(align2RefStrPtr->begin() + align2LeadingCut,
+                                                          align2RefStrPtr->end() - align2TrailingCut,
+                                                          contig.seq, nSpacer, exclBlocks2);
+#ifdef DEBUG_REFINER
+            log_os << __FUNCTION__ << " Kmer-masked references\n";
+            log_os << "\t ref Lengths " << align1RefStrPtr->size() << " " << align2RefStrPtr->size() << "\n";
+            log_os << "\t cutref Lengths " << cutRef1.size() << " " << cutRef2.size() << "\n";
+#endif
+
+            bool bp1RnaStrandFw; // Is the RNA fusion transcript on the forward strand at bp1
+            bool bp2RnaStrandFw;
+            if (bporient.isBp1First)
+            {
+                bp1RnaStrandFw = (sv.bp1.state == SVBreakendState::RIGHT_OPEN);
+                bp2RnaStrandFw = (sv.bp2.state == SVBreakendState::LEFT_OPEN);
+            }
+            else
+            {
+                bp1RnaStrandFw = (sv.bp1.state == SVBreakendState::LEFT_OPEN);
+                bp2RnaStrandFw = (sv.bp2.state == SVBreakendState::RIGHT_OPEN);
+            }
+            bool bp1Fw = (bporient.isBp1Reversed != bp1RnaStrandFw); //Should we look for the splice motif on the fw or rev strand in the bp1 ref seq
+            bool bp2Fw = (bporient.isBp2Reversed != bp2RnaStrandFw);
+            if (bporient.isBp2AlignedFirst) //bp1 and bp2 sequences have been swapped above
+                std::swap(bp1Fw, bp2Fw);
+
+
+#ifdef DEBUG_REFINER
+            log_os << __FUNCTION__ << " isStranded: " << bporient.isStranded << "; bp1Fw: " << bp1Fw << " ; bp2Fw: " << bp2Fw << '\n';
+#endif
+            _RNASpanningAligner.align(contig.seq.begin(), contig.seq.end(),
+                                      cutRef1.begin(), cutRef1.end(), cutRef2.begin(), cutRef2.end(),
+                                      bp1Fw, bp2Fw, bporient.isStranded,
+                                      alignment);
+
+#ifdef DEBUG_REFINER
+            log_os << __FUNCTION__ << " Masked 1: " << alignment.align1 << '\n';
+            log_os << __FUNCTION__ << " Masked 2: " << alignment.align2 << '\n';
+#endif
+            if (!(translateMaskedAlignment(alignment.align1, exclBlocks1) &&
+                  translateMaskedAlignment(alignment.align2, exclBlocks2)))
+            {
+#ifdef DEBUG_REFINER
+                log_os << __FUNCTION__ << " Failed to fix kmer-masked alignment\n";
+#endif
+                alignment.align1.clear();
+                alignment.align2.clear();
+            }
+#ifdef DEBUG_REFINER
+            log_os << __FUNCTION__ << " Fixed 1: " << alignment.align1 << '\n';
+            log_os << __FUNCTION__ << " Fixed 2: " << alignment.align2 << '\n';
+#endif
+        }
+        else
+        {
+#ifdef DEBUG_REFINER
+            log_os << __FUNCTION__ << " Ref1 for alignment: "
+                   << bp1refSeq.substr(align1LeadingCut, bp1refSeq.size()-align1LeadingCut-align1TrailingCut) << '\n';
+            log_os << __FUNCTION__ << " Ref2 for alignment: "
+                   << bp2refSeq.substr(align2LeadingCut, bp2refSeq.size()-align2LeadingCut-align2TrailingCut) << '\n';
+#endif
+            _spanningAligner.align(contig.seq.begin(), contig.seq.end(),
+                                   align1RefStrPtr->begin() + align1LeadingCut, align1RefStrPtr->end() - align1TrailingCut,
+                                   align2RefStrPtr->begin() + align2LeadingCut, align2RefStrPtr->end() - align2TrailingCut,
+                                   alignment);
+        }
+
+        alignment.align1.beginPos += align1LeadingCut;
+        alignment.align2.beginPos += align2LeadingCut;
+
+        std::string extendedContig;
+        getExtendedContig(alignment, contig.seq, *align1RefStrPtr, *align2RefStrPtr, extendedContig);
+        assemblyData.extendedContigs.push_back(extendedContig);
+
+#ifdef DEBUG_REFINER
+        log_os << __FUNCTION__ << ": contigIndex: " << contigIndex << " alignment: " << alignment;
+
+        std::string bp1Seq,bp2Seq,insertSeq;
+        getFwdStrandQuerySegments(alignment, contig.seq,
+                                  bporient.isBp2AlignedFirst, bporient.isBp1Reversed, bporient.isBp2Reversed,
+                                  bp1Seq, bp2Seq, insertSeq);
+        log_os << __FUNCTION__ << "\tbp1seq_fwd: " << bp1Seq << "\n";
+        log_os << __FUNCTION__ << "\tinsseq_fwd: " << insertSeq << "\n";
+        log_os << __FUNCTION__ << "\tbp2seq_fwd: " << bp2Seq << "\n";
+#endif
+
+        // QC the alignment to make sure it spans the two breakend locations:
+        static const unsigned minAlignRefSpan(20);
+        const bool isAlignment1Good(alignment.align1.isAligned() && (apath_ref_length(alignment.align1.apath) >= minAlignRefSpan));
+        const bool isAlignment2Good(alignment.align2.isAligned() && (apath_ref_length(alignment.align2.apath) >= minAlignRefSpan));
+        const bool isAlignmentGood(isAlignment1Good && isAlignment2Good);
+#ifdef DEBUG_REFINER
+        log_os << __FUNCTION__ << ": Checking contig aln: " << contigIndex << "\n";
+#endif
+        if (! isAlignmentGood) continue;
+#ifdef DEBUG_REFINER
+        log_os << __FUNCTION__ << ": contig okay: " << contigIndex << "\n";
+#endif
+        if ((! isHighScore) || (alignment.score > assemblyData.spanningAlignments[highScoreIndex].score))
+        {
+            isHighScore = true;
+            highScoreIndex=contigIndex;
+        }
+    }
+
+    if (! isHighScore) return;
+#ifdef DEBUG_REFINER
+    log_os << __FUNCTION__ << ": high scoring contig: " << highScoreIndex << "\n";
+#endif
+
+    // set any additional QC steps before deciding an alignment is
+    // usable:
+
+    {
+        // check the min size and fraction of optimal alignment score
+        // for each of the two sub-alignments on each side of the bp
+        //
+        // note this is done for multiple values -- the lower value is
+        // motivated by cases where a second breakpoint exists near to
+        // the target breakpoint -- the higher value is motivated by
+        // cases with some alignment 'messiness' near the breakpoint
+        // that stabilizes as we move farther away
+        //
+        /// TODO change iterative refspan to a single consistent alignment criteria
+        /// TODO should this be moved into the candidate selection loop?
+        //
+        const SVCandidateAssemblyData::JumpAlignmentResultType& hsAlign(assemblyData.spanningAlignments[highScoreIndex]);
+
+        bool isFilterAlign1(true);
+        bool isFilterAlign2(true);
+        static const unsigned spanSet[] = {75, 100, 200};
+        for (const unsigned maxQCRefSpan : spanSet)
+        {
+            const unsigned qcSpan1 = maxQCRefSpan + (isRNA ? apath_spliced_length(hsAlign.align1.apath) : 0);
+            if (! isFilterSpanningAlignment(qcSpan1, _spanningAligner, true, isRNA, hsAlign.align1.apath))
+            {
+                isFilterAlign1 = false;
+            }
+            const unsigned qcSpan2 = maxQCRefSpan + (isRNA ? apath_spliced_length(hsAlign.align2.apath) : 0);
+            if (! isFilterSpanningAlignment(qcSpan2, _spanningAligner, false, isRNA, hsAlign.align2.apath))
+            {
+                isFilterAlign2 = false;
+            }
+        }
+        if (isFilterAlign1 || isFilterAlign2) return;
+    }
+
+
+    // ok, passed QC -- mark the high-scoring alignment as usable for
+    // hypothesis refinement:
+    {
+        assemblyData.bestAlignmentIndex = highScoreIndex;
+#ifdef DEBUG_REFINER
+        log_os << __FUNCTION__ << ": highscoreid: " << highScoreIndex << " alignment: " << assemblyData.spanningAlignments[highScoreIndex];
+#endif
+
+        // process the alignment into information that's easily usable
+        // in the vcf output (ie. breakends in reference coordinates)
+
+        // summarize usable output information in a second SVBreakend
+        // object -- this is the 'refined' sv:
+        assemblyData.svs.push_back(sv);
+        SVCandidate& newSV(assemblyData.svs.back());
+
+        generateRefinedVCFSVCandidateFromJumpAlignment(bporient, assemblyData, highScoreIndex, newSV);
+
+#ifdef DEBUG_REFINER
+        log_os << __FUNCTION__ << ": highscore refined sv: " << newSV;
+#endif
+    }
+}
+
+
+
+void
+SVCandidateAssemblyRefiner::
+getSmallSVAssembly(
+    const SVCandidate& sv,
+    const bool isFindLargeInsertions,
+    SVCandidateAssemblyData& assemblyData) const
+{
+#ifdef DEBUG_REFINER
+    log_os << __FUNCTION__ << ": START\n";
+#endif
+
+    assemblyData.isSpanning = false;
+
+    if (assemblyData.isCandidateSpanning)
+    {
+        _spanToComplexAssmRegions.addInterval(sv.bp1.interval);
+    }
+    else
+    {
+        // check if we've already assembled this region?
+        if (_spanToComplexAssmRegions.isSubsetOfRegion(sv.bp1.interval))
+        {
+            assemblyData.isOverlapSkip = true;
+            return;
+        }
+    }
+
+    // how much additional reference sequence should we extract from
+    // around each side of the breakend region?
+    //
+    // see extended description in getJumpAssembly
+    static const pos_t extraRefEdgeSize(700);
+
+    // how much reference should we additionally extract for split
+    // read alignment, but not for variant-discovery alignment?
+    //
+    // see extended description in getJumpAssembly
+    static const pos_t extraRefSplitSize(100);
+
+    static const pos_t extraRefSize(extraRefEdgeSize+extraRefSplitSize);
+
+#ifdef ITERATIVE_ASSEMBLER
+    static const float extraVarSizePerc(0.1f);
+    static const float extraSuppReadPerc(0.2f);
+#endif
+
+    // min alignment context
+    //const unsigned minAlignContext(4);
+
+    // there's always a small chance that our region could fall
+    // completely off the edge of the reference.  b/c of circular
+    // genomes, this can't be treated as a bug -- it's a legitimate
+    // breakend hypothesis that we just aren't setup to handle
+    // correctly:
+    if (! isRefRegionValid(_header, sv.bp1.interval)) return;
+
+    unsigned leadingTrim;
+    unsigned trailingTrim;
+    getIntervalReferenceSegment(_opt.referenceFilename, _header, extraRefSize, sv.bp1.interval, assemblyData.bp1ref, leadingTrim, trailingTrim);
+
+    // in most cases, these values should equal extraRefSplitSize,
+    // sometimes they're forced to be shorter b/c we didn't retrieve as much reference sequence as targeted:
+    const pos_t maxLeadingCut(std::max(0, extraRefSize - static_cast<pos_t>(leadingTrim)));
+    const pos_t maxTrailingCut(std::max(0, extraRefSize - static_cast<pos_t>(trailingTrim)));
+    const pos_t leadingCut(std::max(0, maxLeadingCut - extraRefEdgeSize));
+    const pos_t trailingCut(std::max(0, maxTrailingCut - extraRefEdgeSize));
+
+    const std::string& align1RefStr(assemblyData.bp1ref.seq());
+
+    const bool isSearchRemoteInsertionReads((! _opt.isSkipRemoteReads) && isFindLargeInsertions);
+
+    // assemble contigs in the breakend region
+    _smallSVAssembler.assembleSingleSVBreakend(sv.bp1, assemblyData.bp1ref, isSearchRemoteInsertionReads, assemblyData.remoteReads, assemblyData.contigs);
+
+#ifdef DEBUG_REFINER
+    log_os << __FUNCTION__ << ": align1RefSize/Seq: " << align1RefStr.size() << '\n';
+    printSeq(align1RefStr,log_os);
+    log_os << '\n';
+#endif
+
+    const unsigned contigCount(assemblyData.contigs.size());
+
+#ifdef DEBUG_REFINER
+    log_os << __FUNCTION__ << ": contigCount: " << contigCount << '\n';
+    for (unsigned contigIndex(0); contigIndex<contigCount; ++contigIndex)
+    {
+        const AssembledContig& contig(assemblyData.contigs[contigIndex]);
+        log_os << __FUNCTION__ << ": contigIndex: " << contigIndex << " contig: " << contig;
+    }
+#endif
+
+    // make sure an alignment object exists for every contig, even if
+    // it's empty:
+    assemblyData.smallSVAlignments.resize(contigCount);
+    assemblyData.smallSVSegments.resize(contigCount);
+    assemblyData.largeInsertInfo.resize(contigCount);
+    assemblyData.extendedContigs.resize(contigCount);
+
+    bool isHighScore(false);
+    unsigned highScoreIndex(0);
+
+#ifdef ITERATIVE_ASSEMBLER
+    bool isSecHighScore(false);
+    unsigned highScoreVarSize(0);
+    unsigned secHighScoreIndex(0);
+    unsigned secHighScoreVarSize(0);
+#endif
+
+    std::vector<unsigned> largeInsertionCandidateIndex;
+
+    for (unsigned contigIndex(0); contigIndex<contigCount; ++contigIndex)
+    {
+        const AssembledContig& contig(assemblyData.contigs[contigIndex]);
+
+#ifdef DEBUG_REFINER
+        log_os << __FUNCTION__ << ": start aligning contigIndex: " << contigIndex << '\n';
+#endif
+
+
+        // try out two different aligners, one optimized to find large
+        // events and one optimized to find smaller events, only look
+        // for small events if the large event search fails:
+        SVCandidateAssemblyData::SmallAlignmentResultType& alignment(assemblyData.smallSVAlignments[contigIndex]);
+        std::string& extendedContig(assemblyData.extendedContigs[contigIndex]);
+        std::vector<std::pair<unsigned,unsigned> >& candidateSegments(assemblyData.smallSVSegments[contigIndex]);
+        candidateSegments.clear();
+
+        // remove candidate from consideration unless we find a
+        // sufficiently large indel with good flanking sequence:
+        bool isSmallSVCandidate(false);
+
+        // come up with a more intelligent reference span limit:
+        pos_t adjustedLeadingCut(leadingCut);
+        pos_t adjustedTrailingCut(trailingCut);
+        {
+            // for all kmers in the contig, find the earliest and
+            // latest kmer match in the reference, limit reference
+            // to these points (but not less than the original breakend region)
+            //
+            // pick low k because this is just a simple runtime optimization
+            static const int merSize(10);
+            std::unordered_set<std::string> contigHash;
+            const unsigned contigSize(contig.seq.size());
+            for (unsigned contigMerIndex(0); contigMerIndex<(contigSize-(merSize-1)); ++contigMerIndex)
+            {
+                contigHash.insert(contig.seq.substr(contigMerIndex,merSize));
+            }
+
+            const pos_t refSize(align1RefStr.size());
+            const pos_t minRefIndex(leadingCut);
+            const pos_t maxRefIndex(refSize-(trailingCut+merSize));
+
+            const pos_t maxFwdRefIndex(std::min(maxLeadingCut, maxRefIndex));
+            pos_t refIndex=minRefIndex;
+            for (refIndex=minRefIndex; refIndex<=maxFwdRefIndex; refIndex++)
+            {
+                if (contigHash.count(align1RefStr.substr(refIndex,merSize)) != 0) break;
+            }
+            adjustedLeadingCut=refIndex;
+
+            const pos_t minRevRefIndex(std::max(minRefIndex,refSize-maxTrailingCut));
+            for (refIndex=(maxRefIndex); refIndex>=minRevRefIndex; refIndex--)
+            {
+                if (contigHash.count(align1RefStr.substr(refIndex,merSize)) != 0) break;
+            }
+            adjustedTrailingCut=(refSize-(refIndex+merSize));
+        }
+
+#ifdef DEBUG_REFINER
+        log_os << __FUNCTION__ << " Ref for alignment: "
+               << align1RefStr.substr(adjustedLeadingCut, align1RefStr.size()-adjustedLeadingCut-adjustedTrailingCut) << '\n';
+#endif
+        ;
+
+        // start with largeSV aligner
+        {
+            _largeSVAligner.align(
+                contig.seq.begin(), contig.seq.end(),
+                align1RefStr.begin() + adjustedLeadingCut, align1RefStr.end() - adjustedTrailingCut,
+                alignment);
+
+            alignment.align.beginPos += adjustedLeadingCut;
+            getExtendedContig(alignment, contig.seq, align1RefStr, extendedContig);
+
+
+            // trial two different flanking test sizes, this way we
+            // account for multiple neighboring noise scenarios
+            //
+            static const unsigned spanSet[] = {100,200};
+            for (const unsigned maxQCRefSpan : spanSet)
+            {
+                static const bool isLargeOnly(true);
+
+                std::vector<std::pair<unsigned,unsigned> > segments;
+                const bool isCandidate( isSmallAssemblerSVAlignment(
+                                            maxQCRefSpan,
+                                            _largeSVAligner,
+                                            alignment.align,
+                                            contig.seq,
+                                            align1RefStr,
+                                            _opt.scanOpt.minCandidateVariantSize,
+                                            segments,
+                                            isLargeOnly) );
+
+                if (isCandidate)
+                {
+                    // in case both ref spans are accepted take the one with the larger segment count:
+                    if (segments.size() > candidateSegments.size())
+                    {
+                        candidateSegments = segments;
+                    }
+                    isSmallSVCandidate=true;
+                }
+            }
+
+#ifdef DEBUG_REFINER
+            log_os << __FUNCTION__ << ": finished largeAligner. contigIndex: " << contigIndex
+                   << " isSmallSVCandidate " << isSmallSVCandidate
+                   << " alignment: " << alignment;
+#endif
+        }
+
+
+        // didn't find anything? try again focused on smaller events:
+        /// TODO: get rid of this step
+        if (! isSmallSVCandidate)
+        {
+            _smallSVAligner.align(
+                contig.seq.begin(), contig.seq.end(),
+                align1RefStr.begin() + adjustedLeadingCut, align1RefStr.end() - adjustedTrailingCut,
+                alignment);
+
+            alignment.align.beginPos += adjustedLeadingCut;
+            getExtendedContig(alignment, contig.seq, align1RefStr, extendedContig);
+
+            // trial two different flanking test sizes, this way we
+            // account for multiple neighboring noise scenarios
+            //
+            static const unsigned spanSet[] = {100,200};
+            for (const unsigned maxQCRefSpan : spanSet)
+            {
+                std::vector<std::pair<unsigned,unsigned> > segments;
+                const bool isCandidate( isSmallAssemblerSVAlignment(
+                                            maxQCRefSpan,
+                                            _smallSVAligner,
+                                            alignment.align,
+                                            contig.seq,
+                                            align1RefStr,
+                                            _opt.scanOpt.minCandidateVariantSize,
+                                            segments) );
+
+                if (isCandidate)
+                {
+                    // in case both ref spans are accepted take the
+                    // one with the larger segment count:
+                    if (segments.size() > candidateSegments.size())
+                    {
+                        candidateSegments = segments;
+                    }
+                    isSmallSVCandidate=true;
+                }
+            }
+
+#ifdef DEBUG_REFINER
+            log_os << __FUNCTION__ << ": finished smallAligner. contigIndex: " << contigIndex
+                   << " isSmallSVCandidate " << isSmallSVCandidate
+                   << " alignment: " << alignment;
+#endif
+        }
+
+
+        // test each alignment for suitability to be the left or right
+        // side of a large insertion:
+        //
+        // all practical combinations of left and right candidates
+        // will be enumerated below to see if there's a good fit:
+        //
+        if (isFindLargeInsertions)
+        {
+            LargeInsertionInfo& candidateInsertInfo(assemblyData.largeInsertInfo[contigIndex]);
+            candidateInsertInfo.clear();
+
+            LargeInsertionInfo insertInfo;
+#ifdef DEBUG_CONTIG
+            log_os << __FUNCTION__ << ": contig length: " << contig.seq.size() << "\n"
+                   << __FUNCTION__ << ": contig seq: " << contig.seq << "\n"
+                   << __FUNCTION__ << ": trim contig start offset: " << contig.conservativeRange.begin_pos() << "\n"
+                   << __FUNCTION__ << ": trim contig end offset: " << contig.conservativeRange.end_pos() << "\n";
+#endif
+            ALIGNPATH::path_t apath_conservative(alignment.align.apath);
+            apath_limit_read_length(contig.conservativeRange,apath_conservative);
+
+            bool isCandidate( isLargeInsertAlignment(
+                                  _largeInsertEdgeAligner,
+                                  apath_conservative,
+                                  insertInfo));
+
+            if (isCandidate)
+            {
+                // if passed, then get corrected insertInfo without
+                // using conservativeRange:
+                LargeInsertionInfo insertInfo2;
+                isCandidate = isLargeInsertAlignment(
+                                  _largeInsertEdgeAligner,
+                                  alignment.align.apath,
+                                  insertInfo2);
+
+                if ((insertInfo.isLeftCandidate != insertInfo2.isLeftCandidate)
+                    || (insertInfo.isRightCandidate != insertInfo2.isRightCandidate))
+                {
+                    isCandidate = false;
+                }
+
+                insertInfo.contigOffset = insertInfo2.contigOffset;
+                insertInfo.refOffset = insertInfo2.refOffset;
+            }
+
+            if (isCandidate)
+            {
+                candidateInsertInfo=insertInfo;
+#ifdef DEBUG_REFINER
+                log_os << __FUNCTION__ << ": inserting large insertion candidation: " << candidateInsertInfo << "\n";
+#endif
+                largeInsertionCandidateIndex.push_back(contigIndex);
+            }
+        }
+
+        if (isSmallSVCandidate)
+        {
+            // keep the top two highest scoring QC'd candidate:
+            // TODO: we should keep all QC'd candidates for the small event case
+            // FIXME : prevents us from finding overlapping events, keep vector of high-scoring contigs?
+            if (! isHighScore)
+            {
+#ifdef DEBUG_REFINER
+                log_os << __FUNCTION__ << ": contigIndex: " << contigIndex << " is high score\n";
+#endif
+                isHighScore = true;
+                highScoreIndex=contigIndex;
+#ifdef ITERATIVE_ASSEMBLER
+                highScoreVarSize = getLargestIndelSize(alignment.align.apath, candidateSegments);
+#endif
+            }
+            else if (alignment.score > assemblyData.smallSVAlignments[highScoreIndex].score)
+            {
+                highScoreIndex = contigIndex;
+
+#ifdef DEBUG_REFINER
+                log_os << __FUNCTION__ << ": contigIndex: " << highScoreIndex << " is high score\n";
+#endif
+
+#ifdef ITERATIVE_ASSEMBLER
+                isSecHighScore = true;
+                secHighScoreIndex = highScoreIndex;
+                secHighScoreVarSize = highScoreVarSize;
+                highScoreVarSize = getLargestIndelSize(alignment.align.apath, candidateSegments);
+
+#ifdef DEBUG_REFINER
+                log_os << __FUNCTION__ << ": contigIndex: " << secHighScoreIndex << " is the second high score\n";
+#endif
+#endif
+            }
+#ifdef ITERATIVE_ASSEMBLER
+            else if ((! isSecHighScore) || (alignment.score > assemblyData.smallSVAlignments[secHighScoreIndex].score))
+            {
+                isSecHighScore = true;
+                secHighScoreIndex = contigIndex;
+                secHighScoreVarSize = getLargestIndelSize(alignment.align.apath, candidateSegments);
+#ifdef DEBUG_REFINER
+                log_os << __FUNCTION__ << ": contigIndex: " << secHighScoreIndex << " is the second high score\n";
+#endif
+            }
+#endif
+        }
+    }
+
+#ifdef ITERATIVE_ASSEMBLER
+    // select the contig with the larger indel size between the two
+    // highest-scoring contigs
+    if (isSecHighScore)
+    {
+        const unsigned highScoreSuppReads = assemblyData.contigs[highScoreIndex].supportReads.size();
+        const unsigned secHighScoreSuppReads = assemblyData.contigs[secHighScoreIndex].supportReads.size();
+#ifdef DEBUG_REFINER
+        log_os << __FUNCTION__ << ": contig #" << highScoreIndex << "has " << highScoreSuppReads
+               <<" support reads, with max variant size " << highScoreVarSize;
+        log_os << __FUNCTION__ << ": contig #" << secHighScoreIndex << "has " << secHighScoreSuppReads
+               <<" support reads, with max variant size " << secHighScoreVarSize;
+#endif
+
+        const bool secondIsBest((secHighScoreSuppReads > highScoreSuppReads *(1+extraSuppReadPerc)) ||
+                                (secHighScoreVarSize > highScoreVarSize * (1+extraVarSizePerc)));
+        if (secondIsBest) highScoreIndex = secHighScoreIndex;
+#ifdef DEBUG_REFINER
+        log_os << __FUNCTION__ << ": contigIndex: " << highScoreIndex << " is finally selected.\n";
+#endif
+    }
+#endif
+
+
+    // set any additional QC steps before deciding an alignment is usable:
+    // TODO:
+    std::set<pos_t> insPos;
+
+    // finished QC, skip small deletions if no candidates have appeared:
+    if (isHighScore)
+    {
+        assemblyData.bestAlignmentIndex = highScoreIndex;
+#ifdef DEBUG_REFINER
+        log_os << __FUNCTION__ << ": highscoreid: " << highScoreIndex << " alignment: " << assemblyData.smallSVAlignments[highScoreIndex];
+#endif
+
+        // process the alignment into information that's easily usable
+        // in the vcf output (ie. breakends in reference coordinates)
+
+        const AssembledContig& bestContig(assemblyData.contigs[assemblyData.bestAlignmentIndex]);
+        const SVCandidateAssemblyData::SmallAlignmentResultType& bestAlign(assemblyData.smallSVAlignments[assemblyData.bestAlignmentIndex]);
+
+        const SVCandidateAssemblyData::CandidateSegmentSetType& candidateSegments(assemblyData.smallSVSegments[assemblyData.bestAlignmentIndex]);
+        unsigned segmentIndex = 0;
+        for (const SVCandidateAssemblyData::CandidateSegmentType& segRange : candidateSegments)
+        {
+            // copy the low-res candidate sv and start customizing:
+            assemblyData.svs.push_back(sv);
+
+            SVCandidate& newSV(assemblyData.svs.back());
+            newSV.assemblyAlignIndex = assemblyData.bestAlignmentIndex;
+            newSV.assemblySegmentIndex = segmentIndex;
+            setSmallCandSV(assemblyData.bp1ref, bestContig.seq, bestAlign.align, segRange, newSV);
+            segmentIndex++;
+
+            // provide a weak filter to keep fully and partially
+            // assembled duplicates of the same event from occurring:
+            if (getExtendedSVType(newSV) == EXTENDED_SV_TYPE::INSERT)
+            {
+                insPos.insert(newSV.bp1.interval.range.begin_pos());
+            }
+
+#ifdef DEBUG_REFINER
+            log_os << __FUNCTION__ << ": small refined sv: " << newSV;
+#endif
+
+#ifdef DEBUG_CONTIG
+            const int contigSize = bestContig.seq.length();
+            const ALIGNPATH::path_t apathTillSvStart(&bestAlign.align.apath[0], &bestAlign.align.apath[segRange.first]);
+            const ALIGNPATH::path_t apathTillSvEnd(&bestAlign.align.apath[0], &bestAlign.align.apath[segRange.second+1]);
+            const int leftSize = apath_read_length(apathTillSvStart);
+            const int endPos = apath_read_length(apathTillSvEnd);
+            const int rightSize = contigSize - apath_read_length(apathTillSvEnd);
+
+            log_os << __FUNCTION__ << ": contig has size " << contigSize << ": " << bestContig.seq << "\n";
+            log_os << __FUNCTION__ << ": left part has size " << leftSize << ": " << bestContig.seq.substr(0, leftSize) << "\n";
+            log_os << __FUNCTION__ << ": right part has size " << rightSize << ": " << bestContig.seq.substr(endPos, rightSize) << "\n";
+#endif
+        }
+    }
+
+    // search for large deletions with incomplete insertion sequence
+    // assembly:
+    {
+        // In case of no fully-assembled candidate, solve for any
+        // strong large insertion candidate
+        //
+        // This is done by searching through combinations of the left
+        // and right insertion side candidates found in the primary
+        // contig processing loop
+        if (isFindLargeInsertions)
+        {
+            processLargeInsertion(sv, leadingCut, trailingCut, _largeInsertCompleteAligner, largeInsertionCandidateIndex, insPos, assemblyData);
+        }
+    }
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVCandidateAssemblyRefiner.hh b/src/c++/lib/applications/GenerateSVCandidates/SVCandidateAssemblyRefiner.hh
new file mode 100644
index 0000000..fe12393
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVCandidateAssemblyRefiner.hh
@@ -0,0 +1,102 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Ole Schulz-Trieglaff
+///
+
+#pragma once
+
+#include "GSCOptions.hh"
+#include "EdgeRuntimeTracker.hh"
+
+#include "alignment/GlobalAligner.hh"
+#include "alignment/GlobalLargeIndelAligner.hh"
+#include "alignment/GlobalJumpAligner.hh"
+#include "alignment/GlobalJumpIntronAligner.hh"
+#include "htsapi/bam_header_info.hh"
+#include "manta/SVCandidateAssemblyData.hh"
+#include "manta/SVCandidate.hh"
+#include "manta/SVCandidateAssembler.hh"
+#include "options/SmallAssemblerOptions.hh"
+#include "svgraph/GenomeIntervalTracker.hh"
+
+
+/// \brief methods to improve low-resolution SVCandidates via assembly and contig alignment
+///
+struct SVCandidateAssemblyRefiner
+{
+    SVCandidateAssemblyRefiner(
+        const GSCOptions& opt,
+        const bam_header_info& header,
+        const AllCounts& counts,
+        EdgeRuntimeTracker& edgeTracker);
+
+    /// \brief add assembly and assembly post-processing data to SV candidate
+    ///
+    /// \param[in] isRNA if true add intron logic to the contig jump aligner
+    /// \param[in] isFindLargeInsertions if true search for insertions which can't be completely assembled, and conduct more expensive search for assembly insertion evidence
+    void
+    getCandidateAssemblyData(
+        const SVCandidate& sv,
+        const SVCandidateSetData& svData,
+        const bool isRNA,
+        const bool isFindLargeInsertions,
+        SVCandidateAssemblyData& assemblyData) const;
+
+    void
+    clearEdgeData()
+    {
+        _spanToComplexAssmRegions.clear();
+    }
+
+private:
+
+    /// large SV assembler
+    void
+    getJumpAssembly(
+        const SVCandidate& sv,
+        const bool isRNA,
+        const bool isFindLargeInsertions,
+        SVCandidateAssemblyData& assemblyData) const;
+
+    /// small SV/indel assembler
+    ///
+    /// \param[in] isFindLargeInsertions if true search for insertions which can't be completely assembled, and conduct more expensive search for assembly insertion evidence
+    ///
+    void
+    getSmallSVAssembly(
+        const SVCandidate& sv,
+        const bool isFindLargeInsertions,
+        SVCandidateAssemblyData& assemblyData) const;
+
+    //////////////////////////////// data:
+    const GSCOptions& _opt;
+    const bam_header_info& _header;
+    const SVCandidateAssembler _smallSVAssembler;
+    const SVCandidateAssembler _spanningAssembler;
+    const GlobalAligner<int> _smallSVAligner;
+    const GlobalLargeIndelAligner<int> _largeSVAligner;
+    const GlobalAligner<int> _largeInsertEdgeAligner;
+    const GlobalAligner<int> _largeInsertCompleteAligner;
+    const GlobalJumpAligner<int> _spanningAligner;
+    const GlobalJumpIntronAligner<int> _RNASpanningAligner;
+    mutable GenomeIntervalTracker _spanToComplexAssmRegions;
+};
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVCandidateProcessor.cpp b/src/c++/lib/applications/GenerateSVCandidates/SVCandidateProcessor.cpp
new file mode 100644
index 0000000..e10b03d
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVCandidateProcessor.cpp
@@ -0,0 +1,574 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "SVCandidateProcessor.hh"
+#include "manta/SVMultiJunctionCandidateUtil.hh"
+#include "svgraph/EdgeInfoUtil.hh"
+
+#include "blt_util/log.hh"
+
+#include <iostream>
+
+
+//#define DEBUG_GSV
+
+
+
+SVWriter::
+SVWriter(
+    const GSCOptions& initOpt,
+    const SVLocusScanner& readScanner,
+    const SVLocusSet& cset,
+    const char* progName,
+    const char* progVersion) :
+    opt(initOpt),
+    isSomatic(! opt.somaticOutputFilename.empty()),
+    isTumorOnly(! opt.tumorOutputFilename.empty()),
+    svScore(opt, readScanner, cset.header),
+    candfs(opt.candidateOutputFilename),
+    dipfs(opt.diploidOutputFilename),
+    somfs(opt.somaticOutputFilename),
+    tumfs(opt.tumorOutputFilename),
+    candWriter(opt.referenceFilename, opt.isRNA, cset,candfs.getStream()),
+    diploidWriter(opt.diploidOpt, (! opt.chromDepthFilename.empty()),
+                  opt.referenceFilename,  opt.isRNA, cset,dipfs.getStream()),
+    somWriter(opt.somaticOpt, (! opt.chromDepthFilename.empty()),
+              opt.referenceFilename, cset,somfs.getStream()),
+    tumorWriter(opt.tumorOpt, (! opt.chromDepthFilename.empty()),
+                opt.referenceFilename, cset,tumfs.getStream())
+{
+    if (0 == opt.edgeOpt.binIndex)
+    {
+        std::vector<std::string> noSampleNames;
+        candWriter.writeHeader(progName, progVersion,noSampleNames);
+
+        const std::vector<std::string>& sampleNames(svScore.sampleNames());
+        if (isTumorOnly)
+        {
+            tumorWriter.writeHeader(progName, progVersion,sampleNames);
+        }
+        else
+        {
+            std::vector<std::string> diploidSampleNames(sampleNames.begin(),sampleNames.begin()+svScore.diploidSampleCount());
+            diploidWriter.writeHeader(progName, progVersion,diploidSampleNames);
+            if (isSomatic) somWriter.writeHeader(progName, progVersion,sampleNames);
+        }
+    }
+
+    //get_bam_header_sample_name
+}
+
+
+
+static
+bool
+isAnyFalse(
+    const std::vector<bool>& vb)
+{
+    for (const bool val : vb)
+    {
+        if (! val) return true;
+    }
+    return false;
+}
+
+
+
+void
+SVWriter::
+writeSV(
+    const EdgeInfo& edge,
+    const SVCandidateSetData& svData,
+    const std::vector<SVCandidateAssemblyData>& mjAssemblyData,
+    const SVMultiJunctionCandidate& mjSV,
+    const std::vector<bool>& isInputJunctionFiltered,
+    SupportSamples& svSupports)
+{
+    const unsigned junctionCount(mjSV.junction.size());
+    const unsigned minJunctionCandidateSpanningCount(std::min(2u,opt.minCandidateSpanningCount));
+
+    // track filtration for each junction:
+    std::vector<bool> isJunctionFiltered(isInputJunctionFiltered);
+
+    // early SV filtering:
+    //
+    // 2 junction filter types:
+    // 1) tests where the junction can fail independently
+    // 2) tests where all junctions have to fail for the candidate to be filtered:
+
+    bool isCandidateSpanFail(true);
+
+    for (unsigned junctionIndex(0); junctionIndex<junctionCount; ++junctionIndex)
+    {
+        const SVCandidateAssemblyData& assemblyData(mjAssemblyData[junctionIndex]);
+        const SVCandidate& sv(mjSV.junction[junctionIndex]);
+
+        const bool isCandidateSpanning(assemblyData.isCandidateSpanning);
+
+#ifdef DEBUG_GSV
+        log_os << __FUNCTION__ << ": isSpanningSV junction: " <<  isCandidateSpanning << "\n";
+#endif
+
+        // junction dependent tests:
+        //   (1) at least one junction in the set must have spanning count of 3 or more
+        bool isJunctionSpanFail(false);
+        if (isCandidateSpanning)
+        {
+            if (sv.getPostAssemblySpanningCount() < opt.minCandidateSpanningCount)
+            {
+                isJunctionSpanFail=true;
+            }
+        }
+        if (! isJunctionSpanFail) isCandidateSpanFail=false;
+
+        // independent tests -- as soon as one of these fails, we can continue:
+        //   (1) each spanning junction in the set must have spanning count of
+        //      minJunctionCandidateSpanningCount or more
+        //   (2) no unassembled non-spanning candidates
+        if (isCandidateSpanning)
+        {
+            if (sv.getPostAssemblySpanningCount() < minJunctionCandidateSpanningCount)
+            {
+                isJunctionFiltered[junctionIndex] = true;
+                continue;
+            }
+        }
+        else
+        {
+            if (sv.isImprecise())
+            {
+                // in this case a non-spanning low-res candidate went into assembly but
+                // did not produce a successful contig alignment:
+#ifdef DEBUG_GSV
+                log_os << __FUNCTION__ << ": Rejecting candidate junction: imprecise non-spanning SV\n";
+#endif
+                isJunctionFiltered[junctionIndex] = true;
+                continue;
+            }
+        }
+
+        // check min size for candidate output:
+        if (isSVBelowMinSize(sv,opt.scanOpt.minCandidateVariantSize))
+        {
+#ifdef DEBUG_GSV
+            log_os << __FUNCTION__ << ": Filtering out candidate below min size before candidate output stage\n";
+#endif
+            isJunctionFiltered[junctionIndex] = true;
+            continue;
+        }
+    }
+
+    // revisit dependent tests:
+    //
+    if (isCandidateSpanFail)
+    {
+        for (unsigned junctionIndex(0); junctionIndex<junctionCount; ++junctionIndex)
+        {
+#ifdef DEBUG_GSV
+            log_os << __FUNCTION__ << ": Rejecting candidate junction: minCandidateSpanningCount\n";
+#endif
+            isJunctionFiltered[junctionIndex] = true;
+        }
+    }
+
+    // check to see if all junctions are filtered, if so skip the whole candidate:
+    //
+    if (! isAnyFalse(isJunctionFiltered))
+    {
+#ifdef DEBUG_GSV
+        log_os << __FUNCTION__ << ": Rejecting candidate, all junctions filtered.\n";
+#endif
+        return;
+    }
+
+    std::vector<SVId> junctionSVId(junctionCount);
+
+    // write out candidates for each junction independently:
+    for (unsigned junctionIndex(0); junctionIndex<junctionCount; ++junctionIndex)
+    {
+        if (isJunctionFiltered[junctionIndex]) continue;
+
+        const SVCandidateAssemblyData& assemblyData(mjAssemblyData[junctionIndex]);
+        const SVCandidate& sv(mjSV.junction[junctionIndex]);
+        SVId& svId(junctionSVId[junctionIndex]);
+
+        _idgen.getId(edge, sv, opt.isRNA, svId);
+
+        candWriter.writeSV(svData, assemblyData, sv, svId);
+    }
+
+    if (opt.isSkipScoring) return;
+
+    // check min size for scoring:
+    for (unsigned junctionIndex(0); junctionIndex<junctionCount; ++junctionIndex)
+    {
+        if (isJunctionFiltered[junctionIndex]) continue;
+
+        const SVCandidate& sv(mjSV.junction[junctionIndex]);
+        if (isSVBelowMinSize(sv, opt.minScoredVariantSize))
+        {
+#ifdef DEBUG_GSV
+            log_os << __FUNCTION__ << ": Filtering out candidate junction below min size at scoring stage\n";
+#endif
+            isJunctionFiltered[junctionIndex] = true;
+        }
+    }
+
+    // check to see if all junctions are filtered before scoring:
+    //
+    if (! isAnyFalse(isJunctionFiltered)) return;
+
+    bool isMJEvent(false);
+
+    SVModelScoreInfo mjJointModelScoreInfo;
+    mjJointModelScoreInfo.setSampleCount(svScore.sampleCount(),svScore.diploidSampleCount());
+    svScore.scoreSV(svData, mjAssemblyData, mjSV, junctionSVId,
+                    isJunctionFiltered, isSomatic, isTumorOnly,
+                    mjModelScoreInfo, mjJointModelScoreInfo,
+                    isMJEvent, svSupports);
+
+    const unsigned unfilteredJunctionCount(std::count(isJunctionFiltered.begin(),isJunctionFiltered.end(),true));
+
+    // setup all event-level info that we need to share across all event junctions:
+    bool isMJDiploidEvent(isMJEvent);
+    EventInfo event;
+    event.junctionCount=unfilteredJunctionCount;
+    bool isMJEventWriteDiploid(false);
+    bool isMJEventWriteSomatic(false);
+
+    if (isMJEvent)
+    {
+        for (unsigned junctionIndex(0); junctionIndex<junctionCount; ++junctionIndex)
+        {
+            if (isJunctionFiltered[junctionIndex]) continue;
+
+            if (event.label.empty())
+            {
+                const SVId& svId(junctionSVId[junctionIndex]);
+                event.label = svId.localId;
+            }
+
+            const SVModelScoreInfo& modelScoreInfo(mjModelScoreInfo[junctionIndex]);
+
+            // for diploid case only we decide to use multi-junction or single junction based on best score:
+            // (for somatic case a lower somatic score could be due to reference evidence in an event member)
+            //
+            if (mjJointModelScoreInfo.diploid.filters.size() > modelScoreInfo.diploid.filters.size())
+            {
+                isMJDiploidEvent=false;
+            }
+            else if (mjJointModelScoreInfo.diploid.altScore < modelScoreInfo.diploid.altScore)
+            {
+                isMJDiploidEvent=false;
+            }
+        }
+
+        // for events, we write all junctions, or no junctions, so we need to determine write status over
+        // the whole set rather than a single junctions:
+        for (unsigned junctionIndex(0); junctionIndex<junctionCount; ++junctionIndex)
+        {
+            const SVModelScoreInfo& modelScoreInfo(mjModelScoreInfo[junctionIndex]);
+
+            if (isMJDiploidEvent)
+            {
+                if ((mjJointModelScoreInfo.diploid.altScore >= opt.diploidOpt.minOutputAltScore) ||
+                    (modelScoreInfo.diploid.altScore >= opt.diploidOpt.minOutputAltScore))
+                {
+                    isMJEventWriteDiploid = true;
+                }
+            }
+
+            if ((mjJointModelScoreInfo.somatic.somaticScore >= opt.somaticOpt.minOutputSomaticScore) ||
+                (modelScoreInfo.somatic.somaticScore >= opt.somaticOpt.minOutputSomaticScore))
+            {
+                isMJEventWriteSomatic = true;
+            }
+
+            //TODO: set up criteria for isMJEventWriteTumor
+        }
+    }
+
+    // final scored output is treated (mostly) independently for each junction:
+    //
+    for (unsigned junctionIndex(0); junctionIndex<junctionCount; ++junctionIndex)
+    {
+        if (isJunctionFiltered[junctionIndex]) continue;
+
+        const SVCandidateAssemblyData& assemblyData(mjAssemblyData[junctionIndex]);
+        const SVCandidate& sv(mjSV.junction[junctionIndex]);
+        const SVModelScoreInfo& modelScoreInfo(mjModelScoreInfo[junctionIndex]);
+
+        const SVId& svId(junctionSVId[junctionIndex]);
+        const SVScoreInfo& baseInfo(modelScoreInfo.base);
+        static const EventInfo nonEvent;
+
+        if (isTumorOnly)
+        {
+            //TODO: add logic for MJEvent
+
+            const SVScoreInfoTumor& tumorInfo(modelScoreInfo.tumor);
+            tumorWriter.writeSV(svData, assemblyData, sv, svId, baseInfo, tumorInfo, nonEvent);
+        }
+        else
+        {
+            {
+                const EventInfo& diploidEvent( isMJDiploidEvent ? event : nonEvent );
+                const SVModelScoreInfo& scoreInfo(isMJDiploidEvent ? mjJointModelScoreInfo : modelScoreInfo);
+                const SVScoreInfoDiploid& diploidInfo(scoreInfo.diploid);
+
+                bool isWriteDiploid(false);
+                if (isMJDiploidEvent)
+                {
+                    isWriteDiploid = isMJEventWriteDiploid;
+                }
+                else
+                {
+                    isWriteDiploid = (modelScoreInfo.diploid.altScore >= opt.diploidOpt.minOutputAltScore);
+                }
+
+                if (opt.isRNA) isWriteDiploid = true; /// TODO remove after adding RNA scoring
+
+                if (isWriteDiploid)
+                {
+                    diploidWriter.writeSV(svData, assemblyData, sv, svId, baseInfo, diploidInfo, diploidEvent, modelScoreInfo.diploid);
+                }
+            }
+
+            if (isSomatic)
+            {
+                const SVModelScoreInfo& scoreInfo(isMJEvent ? mjJointModelScoreInfo : modelScoreInfo);
+                const SVScoreInfoSomatic& somaticInfo(scoreInfo.somatic);
+
+                bool isWriteSomatic(false);
+
+                if (isMJEvent)
+                {
+                    isWriteSomatic = isMJEventWriteSomatic;
+                }
+                else
+                {
+                    isWriteSomatic = (modelScoreInfo.somatic.somaticScore >= opt.somaticOpt.minOutputSomaticScore);
+                }
+
+                if (isWriteSomatic)
+                {
+                    somWriter.writeSV(svData, assemblyData, sv, svId, baseInfo, somaticInfo, event, modelScoreInfo.somatic);
+                }
+            }
+        }
+    }
+}
+
+
+
+SVCandidateProcessor::
+SVCandidateProcessor(
+    const GSCOptions& opt,
+    const SVLocusScanner& readScanner,
+    const char* progName,
+    const char* progVersion,
+    const SVLocusSet& cset,
+    EdgeRuntimeTracker& edgeTracker,
+    GSCEdgeStatsManager& edgeStatMan) :
+    _opt(opt),
+    _cset(cset),
+    _edgeTracker(edgeTracker),
+    _edgeStatMan(edgeStatMan),
+    _svRefine(opt, cset.header, cset.getCounts(), _edgeTracker),
+    _svWriter(opt, readScanner, cset, progName, progVersion)
+{}
+
+
+
+void
+SVCandidateProcessor::
+evaluateCandidates(
+    const EdgeInfo& edge,
+    const std::vector<SVMultiJunctionCandidate>& mjSVs,
+    const SVCandidateSetData& svData,
+    SupportSamples& svSupports)
+{
+    const bool isIsolatedEdge(testIsolatedEdge(_cset,edge));
+
+    bool isFindLargeInsertions(isIsolatedEdge);
+    if (isFindLargeInsertions)
+    {
+        for (const SVMultiJunctionCandidate& mjCandidateSV : mjSVs)
+        {
+            for (const SVCandidate& candidateSV : mjCandidateSV.junction)
+            {
+                if (! isComplexSV(candidateSV)) isFindLargeInsertions=false;
+            }
+        }
+    }
+
+    _svRefine.clearEdgeData();
+    for (const auto& cand : mjSVs)
+    {
+        evaluateCandidate(edge,cand,svData,isFindLargeInsertions, svSupports);
+    }
+}
+
+
+
+void
+SVCandidateProcessor::
+evaluateCandidate(
+    const EdgeInfo& edge,
+    const SVMultiJunctionCandidate& mjCandidateSV,
+    const SVCandidateSetData& svData,
+    const bool isFindLargeInsertions,
+    SupportSamples& svSupports)
+{
+    assert(! mjCandidateSV.junction.empty());
+
+    const unsigned junctionCount(mjCandidateSV.junction.size());
+
+    if (_opt.isVerbose)
+    {
+        log_os << __FUNCTION__ << ": Starting analysis for SV candidate containing " << junctionCount << " junctions. Low-resolution junction candidate ids:";
+        for (const SVCandidate& sv : mjCandidateSV.junction)
+        {
+            log_os << " " << sv.candidateIndex;
+        }
+        log_os << "\n";
+    }
+#ifdef DEBUG_GSV
+    log_os << __FUNCTION__ << ": CandidateSV: " << mjCandidateSV << "\n";
+#endif
+
+
+    const bool isComplex(isComplexSV(mjCandidateSV));
+    _edgeTracker.addCand(isComplex);
+
+    _edgeStatMan.updateJunctionCandidates(edge, junctionCount, isComplex);
+
+    // assemble each junction independently:
+    bool isAnySmallAssembler(false);
+    std::vector<SVCandidateAssemblyData> mjAssemblyData(junctionCount);
+
+    if (! _opt.isSkipAssembly)
+    {
+        const TimeScoper assmTime(_edgeTracker.assmTime);
+        for (unsigned junctionIndex(0); junctionIndex<junctionCount; ++junctionIndex)
+        {
+            const SVCandidate& candidateSV(mjCandidateSV.junction[junctionIndex]);
+            SVCandidateAssemblyData& assemblyData(mjAssemblyData[junctionIndex]);
+            _svRefine.getCandidateAssemblyData(candidateSV, svData, _opt.isRNA, isFindLargeInsertions, assemblyData);
+
+            if (_opt.isVerbose)
+            {
+                log_os << __FUNCTION__ << ": Candidate assembly complete for junction " << junctionIndex << "/" << junctionCount << ". Assembled candidate count: " << assemblyData.svs.size() << "\n";
+            }
+
+            if (! assemblyData.svs.empty())
+            {
+                const unsigned assemblyCount(assemblyData.svs.size());
+
+                const bool isSpanning(assemblyData.isSpanning);
+
+                _edgeStatMan.updateAssemblyCount(edge, assemblyCount, isSpanning);
+
+                if (isSpanning)
+                {
+                    // can't be multi-junction and multi-assembly at the same time:
+                    assert(! ((junctionCount>1) && (assemblyCount>1)));
+                }
+                else
+                {
+                    isAnySmallAssembler=true;
+                }
+
+                // fill in assembly tracking data:
+                _edgeTracker.addAssm(isComplex);
+            }
+            else
+            {
+                _edgeStatMan.updateAssemblyCount(edge, 0, assemblyData.isSpanning, assemblyData.isOverlapSkip);
+            }
+        }
+    }
+
+    SVMultiJunctionCandidate mjAssembledCandidateSV;
+    mjAssembledCandidateSV.junction.resize(junctionCount);
+    std::vector<bool> isJunctionFiltered(junctionCount,false);
+
+    std::vector<unsigned> junctionTracker(junctionCount,0);
+    while (true)
+    {
+        // Note this loop is an accident -- it was intended to enumerate all assembly
+        // combinations for  multiple junctions with multiple assemblies each.
+        // It doesn't do that -- but the broken thing it does, in fact, do, is what we
+        // want for the isAnySmallAssembler case so it's well enough for now.
+        //
+        bool isWrite(false);
+        for (unsigned junctionIndex(0); junctionIndex<junctionCount; ++junctionIndex)
+        {
+            const SVCandidateAssemblyData& assemblyData(mjAssemblyData[junctionIndex]);
+            unsigned& assemblyIndex(junctionTracker[junctionIndex]);
+
+            if (assemblyData.svs.empty())
+            {
+                if (assemblyIndex != 0) continue;
+#ifdef DEBUG_GSV
+                log_os << __FUNCTION__ << ": score and output low-res candidate junction " << junctionIndex << "\n";
+#endif
+                mjAssembledCandidateSV.junction[junctionIndex] = mjCandidateSV.junction[junctionIndex];
+            }
+            else
+            {
+                if (assemblyIndex >= assemblyData.svs.size()) continue;
+                const SVCandidate& assembledSV(assemblyData.svs[assemblyIndex]);
+#ifdef DEBUG_GSV
+                log_os << __FUNCTION__ << ": score and output assembly candidate junction " << junctionIndex << ": " << assembledSV << "\n";
+#endif
+                mjAssembledCandidateSV.junction[junctionIndex] = assembledSV;
+            }
+            assemblyIndex++;
+            isWrite = true;
+        }
+        if (! isWrite) break;
+
+
+        // if any junctions go into the small assembler (for instance b/c the breakends are too close), then
+        // treat all junctions as independent:
+        //
+        const TimeScoper scoreTime(_edgeTracker.scoreTime);
+        if ((junctionCount>1) && isAnySmallAssembler)
+        {
+            for (unsigned junctionIndex(0); junctionIndex<junctionCount; ++junctionIndex)
+            {
+                std::vector<bool> isJunctionFilteredHack(junctionCount,true);
+                isJunctionFilteredHack[junctionIndex] = false;
+                _svWriter.writeSV(edge, svData, mjAssemblyData, mjAssembledCandidateSV,
+                                  isJunctionFilteredHack, svSupports);
+            }
+        }
+        else
+        {
+            _svWriter.writeSV(edge, svData, mjAssemblyData, mjAssembledCandidateSV,
+                              isJunctionFiltered, svSupports);
+        }
+    }
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVCandidateProcessor.hh b/src/c++/lib/applications/GenerateSVCandidates/SVCandidateProcessor.hh
new file mode 100644
index 0000000..a481ed2
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVCandidateProcessor.hh
@@ -0,0 +1,124 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "EdgeRuntimeTracker.hh"
+#include "GSCEdgeStatsManager.hh"
+#include "GSCOptions.hh"
+#include "SVCandidateAssemblyRefiner.hh"
+#include "SVScorer.hh"
+
+#include "common/OutStream.hh"
+#include "manta/JunctionIdGenerator.hh"
+#include "manta/SVCandidateAssemblyData.hh"
+#include "manta/SVMultiJunctionCandidate.hh"
+#include "format/VcfWriterCandidateSV.hh"
+#include "format/VcfWriterDiploidSV.hh"
+#include "format/VcfWriterSomaticSV.hh"
+#include "format/VcfWriterTumorSV.hh"
+
+
+#include <memory>
+
+//#define DEBUG_GSV
+
+
+
+struct SVWriter
+{
+    SVWriter(
+        const GSCOptions& initOpt,
+        const SVLocusScanner& readScanner,
+        const SVLocusSet& cset,
+        const char* progName,
+        const char* progVersion);
+
+    void
+    writeSV(
+        const EdgeInfo& edge,
+        const SVCandidateSetData& svData,
+        const std::vector<SVCandidateAssemblyData>& assemblyData,
+        const SVMultiJunctionCandidate& mjSV,
+        const std::vector<bool>& isInputJunctionFiltered,
+        SupportSamples& svSupports);
+
+    ///////////////////////// data:
+    const GSCOptions& opt;
+    const bool isSomatic;
+    const bool isTumorOnly;
+
+    SVScorer svScore;
+
+    std::vector<SVModelScoreInfo> mjModelScoreInfo;
+
+    OutStream candfs;
+    OutStream dipfs;
+    OutStream somfs;
+    OutStream tumfs;
+
+    VcfWriterCandidateSV candWriter;
+    VcfWriterDiploidSV diploidWriter;
+    VcfWriterSomaticSV somWriter;
+    VcfWriterTumorSV tumorWriter;
+
+    JunctionIdGenerator _idgen;
+};
+
+
+struct SVCandidateProcessor
+{
+    SVCandidateProcessor(
+        const GSCOptions& opt,
+        const SVLocusScanner& readScanner,
+        const char* progName,
+        const char* progVersion,
+        const SVLocusSet& cset,
+        EdgeRuntimeTracker& edgeTracker,
+        GSCEdgeStatsManager& _edgeStatMan);
+
+    void
+    evaluateCandidates(
+        const EdgeInfo& edge,
+        const std::vector<SVMultiJunctionCandidate>& mjSVs,
+        const SVCandidateSetData& svData,
+        SupportSamples& svSupports);
+
+private:
+
+    void
+    evaluateCandidate(
+        const EdgeInfo& edge,
+        const SVMultiJunctionCandidate& mjCandidateSV,
+        const SVCandidateSetData& svData,
+        const bool isFindLargeInsertions,
+        SupportSamples& svSupports);
+
+    const GSCOptions& _opt;
+    const SVLocusSet& _cset;
+    EdgeRuntimeTracker& _edgeTracker;
+    GSCEdgeStatsManager& _edgeStatMan;
+    SVCandidateAssemblyRefiner _svRefine;
+    SVWriter _svWriter;
+};
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVEvidence.cpp b/src/c++/lib/applications/GenerateSVCandidates/SVEvidence.cpp
new file mode 100644
index 0000000..0c0d647
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVEvidence.cpp
@@ -0,0 +1,102 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders and Xiaoyu Chen
+///
+
+#include "SVEvidence.hh"
+
+#include <iostream>
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVFragmentEvidenceAlleleBreakendPerRead& svbpr)
+{
+    os << "isEval: " << svbpr.isSplitEvaluated
+       << " isSplitSupport: " << svbpr.isSplitSupport
+       << " isTier2SplitSupport: " << svbpr.isTier2SplitSupport
+       << " splitEvidence: " << svbpr.splitEvidence
+       << " splitLnLhood: " << svbpr.splitLnLhood
+       ;
+
+    return os;
+}
+
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVFragmentEvidenceAlleleBreakend& svbp)
+{
+    os << "isFrag: " << svbp.isFragmentSupport << " fragProb: " << svbp.fragLengthProb << "\n";
+    os << "read1ev: " << svbp.read1 << "\n";
+    os << "read2ev: " << svbp.read2 << "\n";
+    return os;
+}
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVFragmentEvidenceAllele& sval)
+{
+    os << "----BP1: " << sval.bp1;
+    os << "----BP2: " << sval.bp2;
+    return os;
+}
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVFragmentEvidenceRead& svr)
+{
+    os << "readinfo isScanned: " << svr.isScanned
+       << " isAnchored: " << svr.isAnchored(false)
+       << " isTier2Anchored: " << svr.isAnchored(true)
+       << " isShadow: " << svr.isShadow
+       << " mapq: " << svr.mapq << " size: " << svr.size;
+    return os;
+}
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVFragmentEvidence& sve)
+{
+    os << "FRAGMENT_START\n"
+       << "read1: " << sve.read1 << "\n"
+       << "read2: " << sve.read2 << "\n"
+       << "+++++++++++ALT\n" << sve.alt
+       << "+++++++++++REF\n" << sve.ref
+       << "FRAGMENT_END\n";
+
+    return os;
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVEvidence.hh b/src/c++/lib/applications/GenerateSVCandidates/SVEvidence.hh
new file mode 100644
index 0000000..84d990e
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVEvidence.hh
@@ -0,0 +1,328 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders and Xiaoyu Chen
+///
+
+#pragma once
+
+#include <cassert>
+
+#include <iosfwd>
+#include <map>
+#include <string>
+#include <vector>
+
+
+///
+/// \brief classes required to build the SVEvidence object
+///
+/// Note that the SVEvidence object is a kind of mini-database, accumulating all information about the
+/// relationship of each fragment with a specific SV-candidate. This allows us to tie together:
+///
+/// (1) spanning read information from the full fragment
+/// (2) split read information from read1
+/// (3) split read information from read2
+/// (4) fragment mapping properties from read1 & read2
+///
+/// ...for each fragment. The object stores this per-fragment information for all fragments impacting
+/// all alleles at a given locus (But note at present we limit the alternate alleles to one).
+///
+/// The class structure below represents kind of a crummy db schema -- there's probably a better way to
+/// do this?? Open to suggestions...
+///
+
+
+
+
+/// track all support data from an individual read in a fragment specific to an individual breakend of a single allele
+///
+struct SVFragmentEvidenceAlleleBreakendPerRead
+{
+    bool isSplitEvaluated = false; ///< have we checked this read for split support of this bp?
+    bool isSplitSupport = false; ///< if evaluated, does this read support this allele in the bp?
+    bool isTier2SplitSupport = false; ///< if evaluated, does this read support this allele in the bp by permissive criteria?
+    float splitEvidence = 0; ///< if evaluated, what is the evidence score?
+    float splitLnLhood = 0; ///< ln likelihood of best split alignment
+};
+
+std::ostream&
+operator<<(std::ostream& os, const SVFragmentEvidenceAlleleBreakendPerRead& svbpr);
+
+
+/// track all support data from an individual fragment specific to an individual breakend of a single allele
+///
+struct SVFragmentEvidenceAlleleBreakend
+{
+    SVFragmentEvidenceAlleleBreakendPerRead&
+    getRead(const bool isRead1)
+    {
+        return (isRead1 ? read1 : read2);
+    }
+
+    const SVFragmentEvidenceAlleleBreakendPerRead&
+    getRead(const bool isRead1) const
+    {
+        return (isRead1 ? read1 : read2);
+    }
+
+    void
+    clearPairSupport()
+    {
+        isFragmentSupport=false;
+        fragLengthProb = 0;
+    }
+
+    bool isFragmentSupport = false; ///< if true, paired-read analysis shows that this read pair fragment supports this allele on this breakend
+    float fragLengthProb = 0; ///< if isFragmentSupport, what is the prob of the fragment size given this allele?
+    SVFragmentEvidenceAlleleBreakendPerRead read1; // read1 specific evidence
+    SVFragmentEvidenceAlleleBreakendPerRead read2; // read2 specific evidence
+};
+
+std::ostream&
+operator<<(std::ostream& os, const SVFragmentEvidenceAlleleBreakend& svbp);
+
+
+/// track all support data from an individual fragment specific to a single allele of an SV candidate
+///
+struct SVFragmentEvidenceAllele
+{
+    SVFragmentEvidenceAlleleBreakend&
+    getBp(const bool isBp1)
+    {
+        return (isBp1 ? bp1 : bp2 );
+    }
+
+    std::pair<bool,bool>
+    isAnySplitReadSupport(
+        const bool isRead1) const
+    {
+        return std::make_pair(
+                   bp1.getRead(isRead1).isSplitSupport,
+                   bp2.getRead(isRead1).isSplitSupport);
+    }
+
+    std::pair<bool,bool>
+    isAnyTier2SplitReadSupport(
+        const bool isRead1) const
+    {
+        return std::make_pair(
+                   bp1.getRead(isRead1).isTier2SplitSupport,
+                   bp2.getRead(isRead1).isTier2SplitSupport);
+    }
+
+    void
+    clearPairSupport()
+    {
+        bp1.clearPairSupport();
+        bp2.clearPairSupport();
+    }
+
+    SVFragmentEvidenceAlleleBreakend bp1;
+    SVFragmentEvidenceAlleleBreakend bp2;
+};
+
+std::ostream&
+operator<<(std::ostream& os, const SVFragmentEvidenceAllele& sval);
+
+
+
+/// store properties of the reads in a fragment which are not tightly coupled to any one allele/bp, etc....
+///
+struct SVFragmentEvidenceRead
+{
+    /// TODO set anchor policy wrt shadow state!!!
+    bool
+    isAnchored(
+        const bool isTier2) const
+    {
+        return (isTier2 ? _isTier2Anchored : _isAnchored);
+    }
+
+    bool
+    isObservedAnchor(
+        const bool isTier2) const
+    {
+        return (isScanned && isAnchored(isTier2));
+    }
+
+    void
+    setAnchored(
+        const bool val)
+    {
+        _isAnchored=val;
+    }
+
+    void
+    setTier2Anchored(
+        const bool val)
+    {
+        _isTier2Anchored=val;
+    }
+
+    bool isScanned = false; ///< if true, this read's bam record has been scanned to fill in the remaining values in this object
+    bool isShadow = false; ///< read was originally unmapped but had a mapped mate read, mapq is MAPQ of the mate in this case
+
+    unsigned mapq = 0;
+    unsigned size = 0;
+
+private:
+    bool _isAnchored = false; ///< if true, the read is found and known to have a confident mapping wrt fragment support
+    bool _isTier2Anchored = false; ///< if true, the read is found and known to have a confident mapping wrt fragment support at tier2
+};
+
+std::ostream&
+operator<<(std::ostream& os, const SVFragmentEvidenceRead& svr);
+
+
+/// track all support data from an individual fragment specific to an SV hypothesis
+///
+/// this is both to prevent double-counting of evidence and to consolidate different
+/// sources of information (paired-split, etc).
+///
+struct SVFragmentEvidence
+{
+    SVFragmentEvidenceRead&
+    getRead(const bool isRead1)
+    {
+        return (isRead1 ? read1 : read2);
+    }
+
+    const SVFragmentEvidenceRead&
+    getRead(const bool isRead1) const
+    {
+        return (isRead1 ? read1 : read2);
+    }
+
+    /// does this fragment provide any pair evidence for any allele/bp combination?
+    bool
+    isAnySpanningPairSupport() const
+    {
+        const bool isRefSupport(ref.bp1.isFragmentSupport || ref.bp2.isFragmentSupport);
+        const bool isAltSupport(alt.bp1.isFragmentSupport || alt.bp2.isFragmentSupport);
+
+        return (isRefSupport || isAltSupport);
+    }
+
+    /// does this fragment read provide any pair evidence for any bp of the ALT allele?
+    bool
+    isAltSpanningPairSupport() const
+    {
+        const bool isAltSupport(alt.bp1.isFragmentSupport || alt.bp2.isFragmentSupport);
+
+        return (isAltSupport);
+    }
+
+    /// does this fragment read provide any split evidence for any allele/bp combination?
+    std::pair<bool,bool>
+    isAnySplitReadSupport(
+        const bool isRead1) const
+    {
+        const std::pair<bool,bool> isAlt(alt.isAnySplitReadSupport(isRead1));
+        const std::pair<bool,bool> isRef(ref.isAnySplitReadSupport(isRead1));
+
+        return std::make_pair((isAlt.first || isRef.first), (isAlt.second || isRef.second));
+    }
+
+    /// does this fragment read provide any split evidence for any bp of the ALT allele?
+    bool
+    isAltSplitReadSupport(
+        const bool isRead1) const
+    {
+        const std::pair<bool,bool> isAlt(alt.isAnySplitReadSupport(isRead1));
+
+        return (isAlt.first || isAlt.second);
+    }
+
+    /// does this fragment read provide any split evidence for any allele/bp combination?
+    std::pair<bool,bool>
+    isAnyTier2SplitReadSupport(
+        const bool isRead1) const
+    {
+        const std::pair<bool,bool> isAlt(alt.isAnyTier2SplitReadSupport(isRead1));
+        const std::pair<bool,bool> isRef(ref.isAnyTier2SplitReadSupport(isRead1));
+
+        return std::make_pair((isAlt.first || isRef.first), (isAlt.second || isRef.second));
+    }
+
+    /// does this fragment read provide any split evidence for any bp of the ALT allele?
+    bool
+    isAltTier2SplitReadSupport(
+        const bool isRead1) const
+    {
+        const std::pair<bool,bool> isAlt(alt.isAnyTier2SplitReadSupport(isRead1));
+
+        return (isAlt.first || isAlt.second);
+    }
+
+
+    void
+    clearPairSupport()
+    {
+        ref.clearPairSupport();
+        alt.clearPairSupport();
+    }
+
+    SVFragmentEvidenceRead read1;
+    SVFragmentEvidenceRead read2;
+
+    SVFragmentEvidenceAllele alt;
+    SVFragmentEvidenceAllele ref;
+};
+
+std::ostream&
+operator<<(std::ostream& os, const SVFragmentEvidence& sve);
+
+
+/// track all support data for an SV hypothesis
+///
+/// Note how this object is different than SVScoreInfoSomatic -- it is highly detailed and meant to be processed
+/// to create summary statistics and scores later. Those scores and summary statistics should go into objects like
+/// SomaticSVSCoreInfo to be written out in whichever output format is selected.
+///
+struct SVEvidence
+{
+    typedef std::map<std::string,SVFragmentEvidence> evidenceTrack_t;
+
+    unsigned
+    size() const
+    {
+        return samples.size();
+    }
+
+    evidenceTrack_t&
+    getSampleEvidence(
+        const unsigned index)
+    {
+        assert(index < size());
+        return samples[index];
+    }
+
+    const evidenceTrack_t&
+    getSampleEvidence(
+        const unsigned index) const
+    {
+        assert(index < size());
+        return samples[index];
+    }
+
+    std::vector<evidenceTrack_t> samples;
+};
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVFinder.cpp b/src/c++/lib/applications/GenerateSVCandidates/SVFinder.cpp
new file mode 100644
index 0000000..6569e50
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVFinder.cpp
@@ -0,0 +1,1360 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "SVFinder.hh"
+
+#include "blt_util/binomial_test.hh"
+#include "blt_util/log.hh"
+#include "common/Exceptions.hh"
+#include "htsapi/bam_streamer.hh"
+#include "manta/ReadGroupStatsSet.hh"
+#include "manta/SVCandidateUtil.hh"
+#include "manta/SVReferenceUtil.hh"
+#include "svgraph/EdgeInfoUtil.hh"
+
+#include <iostream>
+
+
+//#define DEBUG_SVDATA
+
+#ifdef DEBUG_SVDATA
+#include "blt_util/log.hh"
+#endif
+
+
+
+static
+double
+getSpanningNoiseRate(
+    const AllCounts& counts,
+    const unsigned sampleIndex)
+{
+    static const double pseudoTotal(1000.);
+    static const double pseudoSpan(10.);
+
+    const SampleReadInputCounts& input(counts.getSampleCounts(sampleIndex).input);
+    return ((input.evidenceCount.anom+input.evidenceCount.split)+pseudoSpan)/(input.total()+pseudoTotal);
+}
+
+
+
+static
+double
+getAssemblyNoiseRate(
+    const AllCounts& counts,
+    const unsigned sampleIndex)
+{
+    static const double pseudoTotal(1000.);
+    static const double pseudoAssm(10.);
+
+    const SampleReadInputCounts& input(counts.getSampleCounts(sampleIndex).input);
+    return (input.evidenceCount.assm+pseudoAssm)/(input.total()+pseudoTotal);
+}
+
+
+
+SVFinder::
+SVFinder(
+    const GSCOptions& opt,
+    const SVLocusScanner& readScanner,
+    EdgeRuntimeTracker& edgeTracker,
+    GSCEdgeStatsManager& edgeStatMan) :
+    _scanOpt(opt.scanOpt),
+    _isAlignmentTumor(opt.alignFileOpt.isAlignmentTumor),
+    _readScanner(readScanner),
+    _referenceFilename(opt.referenceFilename),
+    _isRNA(opt.isRNA),
+    _isVerbose(opt.isVerbose),
+    _isSomatic(false),
+    _edgeTracker(edgeTracker),
+    _edgeStatMan(edgeStatMan)
+{
+    // load in set:
+    _set.load(opt.graphFilename.c_str(),true);
+
+    _dFilterPtr.reset(new ChromDepthFilterUtil(opt.chromDepthFilename,_scanOpt.maxDepthFactor,_set.header));
+
+    // setup regionless bam_streams:
+    // setup all data for main analysis loop:
+    for (const std::string& afile : opt.alignFileOpt.alignmentFilename)
+    {
+        // avoid creating shared_ptr temporaries:
+        streamPtr tmp(new bam_streamer(afile.c_str()));
+        _bamStreams.push_back(tmp);
+    }
+
+
+    const unsigned bamCount(_bamStreams.size());
+    {
+        // assert expected bam order of all normal samples followed by all tumor samples,
+        // also, determine if this is a somatic run:
+        bool isFirstTumor(false);
+        for (unsigned bamIndex(0); bamIndex<bamCount; ++bamIndex)
+        {
+            const bool isTumor(_isAlignmentTumor[bamIndex]);
+
+            if (isTumor)
+            {
+                isFirstTumor=true;
+                _isSomatic=true;
+            }
+            assert((! isFirstTumor) || isTumor);
+        }
+    }
+
+    const AllCounts& counts(getSet().getCounts());
+    for (unsigned bamIndex(0); bamIndex<bamCount; ++bamIndex)
+    {
+        // take max rate over all samples:
+        auto updateRate = [](double& x, const double val, const bool isFirst)
+        {
+            if (isFirst) x=val;
+            else x=std::max(x,val);
+        };
+
+        const bool isFirst(bamIndex==0);
+        updateRate(_spanningNoiseRate,getSpanningNoiseRate(counts,bamIndex), isFirst);
+        updateRate(_assemblyNoiseRate,getAssemblyNoiseRate(counts,bamIndex), isFirst);
+    }
+}
+
+
+
+// making the dtor explicit and in the cpp allows unique_ptr to work reliably:
+SVFinder::
+~SVFinder()
+{}
+
+
+/// test if read supports an SV on this edge, if so, add to SVData
+static
+void
+addSVNodeRead(
+    const bam_header_info& bamHeader,
+    const SVLocusScanner& scanner,
+    const SVLocusNode& localNode,
+    const SVLocusNode& remoteNode,
+    const bam_record& bamRead,
+    const unsigned bamIndex,
+    const bool isExpectRepeat,
+    const reference_contig_segment& refSeq,
+    const bool isNode1,
+    const bool isGatherSubmapped,
+    SVCandidateSetSequenceFragmentSampleGroup& svDataGroup,
+    SampleEvidenceCounts& eCounts)
+{
+    using namespace illumina::common;
+
+    if (scanner.isMappedReadFilteredCore(bamRead)) return;
+
+    if (bamRead.map_qual() < scanner.getMinTier2MapQ()) return;
+
+    const bool isSubMapped(bamRead.map_qual() < scanner.getMinMapQ());
+    if ((!isGatherSubmapped) && isSubMapped) return;
+
+    svDataGroup.increment(isNode1,isSubMapped);
+
+    if (! scanner.isSVEvidence(bamRead, bamIndex, refSeq)) return;
+
+    // finally, check to see if the svDataGroup is full... for now, we allow a very large
+    // number of reads to be stored in the hope that we never reach this limit, but just in
+    // case we don't want to exhaust memory in centromere pileups, etc...
+    //
+    // Once svDataGroup is full, we keep scanning but only to find pairs for reads that
+    // have already been entered.
+    //
+    static const unsigned maxDataSize(4000);
+    if ((! svDataGroup.isFull()) && (svDataGroup.size() >= maxDataSize))
+    {
+        svDataGroup.setFull();
+    }
+
+    //
+    // run an initial screen to make sure at least one candidate from this read matches the regions for this edge:
+    //
+    typedef std::vector<SVLocus> loci_t;
+    loci_t loci;
+    scanner.getSVLoci(bamRead, bamIndex, bamHeader, refSeq, loci,
+                      eCounts);
+
+    for (const SVLocus& locus : loci)
+    {
+        const unsigned locusSize(locus.size());
+        assert((locusSize>=1) && (locusSize<=2));
+
+        unsigned readLocalIndex(0);
+        if (locusSize == 2)
+        {
+            unsigned readRemoteIndex(1);
+            if (! locus.getNode(readLocalIndex).isOutCount())
+            {
+                std::swap(readLocalIndex,readRemoteIndex);
+            }
+
+            if (! locus.getNode(readLocalIndex).isOutCount())
+            {
+                std::ostringstream oss;
+                oss << "Unexpected svlocus counts from bam record: " << bamRead << "\n"
+                    << "\tlocus: " << locus << "\n";
+                BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+            }
+            if (! locus.getNode(readRemoteIndex).getInterval().isIntersect(remoteNode.getInterval())) continue; //todo should this intersect be checked in swapped orientation?
+        }
+        else
+        {
+            if (! locus.getNode(readLocalIndex).getInterval().isIntersect(remoteNode.getInterval())) continue;
+        }
+
+        if (! locus.getNode(readLocalIndex).getInterval().isIntersect(localNode.getInterval())) continue; //todo should this intersect be checked in swapped orientation?
+
+        svDataGroup.add(bamRead, isExpectRepeat, isNode1, isSubMapped);
+
+        // once any loci has achieved the local/remote overlap criteria, there's no reason to keep scanning loci
+        // of the same bam record:
+        break;
+    }
+}
+
+
+
+static
+void
+getNodeRefSeq(
+    const bam_header_info& bamHeader,
+    const SVLocus& locus,
+    const NodeIndexType localNodeIndex,
+    const std::string& referenceFilename,
+    GenomeInterval& searchInterval,
+    reference_contig_segment& refSeq)
+{
+    // get full search interval:
+    const SVLocusNode& localNode(locus.getNode(localNodeIndex));
+    searchInterval = (localNode.getInterval());
+    searchInterval.range.merge_range(localNode.getEvidenceRange());
+
+    // grab the reference for segment we're estimating plus a buffer around the segment edges:
+    static const unsigned refEdgeBufferSize(100);
+    getIntervalReferenceSegment(referenceFilename, bamHeader, refEdgeBufferSize, searchInterval, refSeq);
+}
+
+
+
+/// approximate depth tracking -- don't bother reading the cigar string, just assume a perfect match of
+/// size read_size
+static
+void
+addReadToDepthEst(
+    const bam_record& bamRead,
+    const pos_t beginPos,
+    std::vector<unsigned>& depth)
+{
+    const pos_t endPos(beginPos+depth.size());
+    const pos_t refStart(bamRead.pos()-1);
+
+    const pos_t readSize(bamRead.read_size());
+    for (pos_t readIndex(std::max(0,(beginPos-refStart))); readIndex<readSize; ++readIndex)
+    {
+        const pos_t refPos(refStart+readIndex);
+        if (refPos>=endPos) return;
+        const pos_t depthIndex(refPos-beginPos);
+        assert(depthIndex>=0);
+
+        depth[depthIndex]++;
+    }
+}
+
+
+
+void
+SVFinder::
+addSVNodeData(
+    const bam_header_info& bamHeader,
+    const SVLocus& locus,
+    const NodeIndexType localNodeIndex,
+    const NodeIndexType remoteNodeIndex,
+    const GenomeInterval& searchInterval,
+    const reference_contig_segment& refSeq,
+    const bool isNode1,
+    SVCandidateSetData& svData)
+{
+    // get full search interval:
+    const SVLocusNode& localNode(locus.getNode(localNodeIndex));
+    const SVLocusNode& remoteNode(locus.getNode(remoteNodeIndex));
+
+    bool isExpectRepeat(svData.setNewSearchInterval(searchInterval));
+
+    // This is a temporary measure to make the qname collision detection much looser
+    // problems have come up where very large deletions are present in a read, and it is therefore
+    // detected as a repeat in two different regions, even though they are separated by a considerable
+    // distance. Solution is to temporarily turn off collision detection whenever two regions are on
+    // the same chrom (ie. almost always)
+    //
+    // TODO: restore more precise collision detection
+    if (! isExpectRepeat) isExpectRepeat = (localNode.getInterval().tid == remoteNode.getInterval().tid);
+
+#ifdef DEBUG_SVDATA
+    log_os << __FUNCTION__ << ": bp_interval: " << localNode.getInterval()
+           << " evidenceInterval: " << localNode.getEvidenceRange()
+           << " searchInterval: " << searchInterval
+           << " isExpectRepeat: " << isExpectRepeat
+           << "\n";
+#endif
+
+    const bool isMaxDepth(dFilter().isMaxDepthFilter());
+    float maxDepth(0);
+    if (isMaxDepth)
+    {
+        maxDepth = dFilter().maxDepth(searchInterval.tid);
+    }
+    const pos_t searchBeginPos(searchInterval.range.begin_pos());
+    const pos_t searchEndPos(searchInterval.range.end_pos());
+    std::vector<unsigned> normalDepthBuffer(searchInterval.range.size(),0);
+
+    // iterate through reads, test reads for association and add to svData:
+    unsigned bamIndex(0);
+    for (streamPtr& bamPtr : _bamStreams)
+    {
+        const bool isTumor(_isAlignmentTumor[bamIndex]);
+
+        const bool isGatherSubmapped(_isSomatic && (! isTumor));
+
+        SVCandidateSetSequenceFragmentSampleGroup& svDataGroup(svData.getDataGroup(bamIndex));
+        bam_streamer& readStream(*bamPtr);
+
+        // set bam stream to new search interval:
+        readStream.set_new_region(searchInterval.tid,searchInterval.range.begin_pos(),searchInterval.range.end_pos());
+
+#ifdef DEBUG_SVDATA
+        log_os << __FUNCTION__ << ": scanning bamIndex: " << bamIndex << "\n";
+#endif
+        while (readStream.next())
+        {
+            const bam_record& bamRead(*(readStream.get_record_ptr()));
+
+            const pos_t refPos(bamRead.pos()-1);
+            if (refPos >= searchEndPos) break;
+
+            if (isMaxDepth)
+            {
+                if (! isTumor)
+                {
+                    // depth estimation relies on a simple filtration criteria to stay in sync with the chromosome mean
+                    // depth estimates:
+                    if (! bamRead.is_unmapped())
+                    {
+                        addReadToDepthEst(bamRead, searchBeginPos, normalDepthBuffer);
+                    }
+                }
+
+                assert(refPos<searchEndPos);
+                const pos_t depthOffset(refPos - searchBeginPos);
+                if ((depthOffset>=0) && (normalDepthBuffer[depthOffset] > maxDepth)) continue;
+            }
+
+            // test if read supports an SV on this edge, if so, add to SVData
+            addSVNodeRead(
+                bamHeader,_readScanner, localNode, remoteNode,
+                bamRead, bamIndex, isExpectRepeat, refSeq, isNode1,
+                isGatherSubmapped, svDataGroup, _eCounts);
+        }
+        ++bamIndex;
+    }
+}
+
+
+
+// sanity check the final result
+void
+SVFinder::
+checkResult(
+    const SVCandidateSetData& svData,
+    const std::vector<SVCandidate>& svs) const
+{
+    using namespace illumina::common;
+
+    const unsigned svCount(svs.size());
+    if (0 == svCount) return;
+
+    // check that the counts totaled up from the data match those in the sv candidates
+    std::map<unsigned,unsigned> readCounts;
+    std::map<unsigned,unsigned> pairCounts;
+
+    for (unsigned i(0); i<svCount; ++i)
+    {
+        readCounts[i] = 0;
+        pairCounts[i] = 0;
+    }
+
+    const unsigned bamCount(_bamStreams.size());
+    for (unsigned bamIndex(0); bamIndex < bamCount; ++bamIndex)
+    {
+        const SVCandidateSetSequenceFragmentSampleGroup& svDataGroup(svData.getDataGroup(bamIndex));
+        for (const SVCandidateSetSequenceFragment& fragment : svDataGroup)
+        {
+            for (const SVSequenceFragmentAssociation& sva : fragment.svLink)
+            {
+                if (sva.index>=svCount)
+                {
+                    std::ostringstream oss;
+                    oss << "Searching for SVIndex: " << sva.index << " with svSize: " << svCount << "\n";
+                    BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+                }
+
+                if (SVEvidenceType::isPairType(sva.evtype))
+                {
+                    if (fragment.read1.isSet()) readCounts[sva.index]++;
+                    if (fragment.read2.isSet()) readCounts[sva.index]++;
+                    if (fragment.read1.isSet() && fragment.read2.isSet()) pairCounts[sva.index] += 2;
+                }
+            }
+        }
+    }
+
+    for (unsigned svIndex(0); svIndex<svCount; ++svIndex)
+    {
+        const unsigned svObsReadCount(svs[svIndex].bp1.getLocalPairCount() + svs[svIndex].bp2.getLocalPairCount());
+        const unsigned svObsPairCount(svs[svIndex].bp1.getPairCount() + svs[svIndex].bp2.getPairCount());
+        assert(svs[svIndex].bp1.getPairCount() == svs[svIndex].bp2.getPairCount());
+
+        const unsigned dataObsReadCount(readCounts[svIndex]);
+        const unsigned dataObsPairCount(pairCounts[svIndex]);
+
+        bool isCountException(false);
+        if      (svObsReadCount != dataObsReadCount) isCountException=true;
+        else if (svObsPairCount != dataObsPairCount) isCountException=true;
+
+        if (isCountException)
+        {
+            std::ostringstream oss;
+            oss << "Unexpected difference in sv and data read counts.\n"
+                << "\tSVreadCount: " << svObsReadCount << " DataReadCount: " << dataObsReadCount << "\n"
+                << "\tSVpaircount: " << svObsPairCount << " DataPaircount: " << dataObsPairCount << "\n"
+                << "\tsvIndex: " << svIndex << " SV: " << svs[svIndex];
+            BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+
+        }
+    }
+}
+
+
+
+typedef std::map<unsigned,unsigned> movemap_t;
+
+
+
+/// local convenience struct, if only I had closures instead... :<
+struct svCandDeleter
+{
+    svCandDeleter(
+        std::vector<FatSVCandidate>& svs,
+        movemap_t& moveSVIndex) :
+        _shift(0),
+        _isLastIndex(false),
+        _lastIndex(0),
+        _svs(svs),
+        _moveSVIndex(moveSVIndex)
+    {}
+
+    void
+    deleteIndex(
+        const unsigned index)
+    {
+        assert(index <= _svs.size());
+
+        if (_isLastIndex)
+        {
+            for (unsigned i(_lastIndex+1); i<index; ++i)
+            {
+                assert(_shift>0);
+                assert(i>=_shift);
+
+                _svs[(i-_shift)] = _svs[i];
+                // moveSVIndex has already been set for deleted indices, this sets
+                // the move for non-deleted positions:
+                _moveSVIndex[i] = (i-_shift);
+            }
+        }
+        _lastIndex=index;
+        _isLastIndex=true;
+        _shift++;
+    }
+
+private:
+    unsigned _shift;
+    bool _isLastIndex;
+    unsigned _lastIndex;
+    std::vector<FatSVCandidate>& _svs;
+    movemap_t& _moveSVIndex;
+};
+
+
+
+/// check whether any svs have grown to intersect each other
+///
+/// this is also part of the temp hygen hack, so just make this minimally work:
+///
+static
+void
+consolidateOverlap(
+    const unsigned bamCount,
+    SVCandidateSetData& svData,
+    std::vector<FatSVCandidate>& svs)
+{
+    movemap_t moveSVIndex;
+    std::set<unsigned> deletedSVIndex;
+
+    std::vector<unsigned> innerIndexShift;
+
+    const unsigned svCount(svs.size());
+    for (unsigned outerIndex(1); outerIndex<svCount; ++outerIndex)
+    {
+        const unsigned prevInnerIndexShift( (outerIndex<=1) ? 0 : innerIndexShift[outerIndex-2]);
+        innerIndexShift.push_back(prevInnerIndexShift + deletedSVIndex.count(outerIndex-1));
+        for (unsigned innerIndex(0); innerIndex<outerIndex; ++innerIndex)
+        {
+            if (deletedSVIndex.count(innerIndex)) continue;
+
+            if (svs[innerIndex].isIntersect(svs[outerIndex]))
+            {
+#ifdef DEBUG_SVDATA
+                log_os << __FUNCTION__ << ": Merging outer:inner: " << outerIndex << " " << innerIndex << "\n";
+#endif
+                svs[innerIndex].merge(svs[outerIndex]);
+                assert(innerIndexShift.size() > innerIndex);
+                assert(innerIndexShift[innerIndex] <= innerIndex);
+                moveSVIndex[outerIndex] = (innerIndex - innerIndexShift[innerIndex]);
+                deletedSVIndex.insert(outerIndex);
+                break;
+            }
+        }
+    }
+
+    if (! deletedSVIndex.empty())
+    {
+#ifdef DEBUG_SVDATA
+        for (const unsigned index : deletedSVIndex)
+        {
+            log_os << __FUNCTION__ << ": deleted index: " << index << "\n";
+        }
+#endif
+
+        {
+            svCandDeleter svDeleter(svs,moveSVIndex);
+
+            for (const unsigned index : deletedSVIndex)
+            {
+                svDeleter.deleteIndex(index);
+            }
+            svDeleter.deleteIndex(svCount);
+        }
+
+        svs.resize(svs.size()-deletedSVIndex.size());
+
+        // fix indices:
+        for (unsigned i(0); i<svs.size(); ++i)
+        {
+            svs[i].candidateIndex = i;
+        }
+    }
+
+    if (! moveSVIndex.empty())
+    {
+#ifdef DEBUG_SVDATA
+        for (const movemap_t::value_type& val : moveSVIndex)
+        {
+            log_os << __FUNCTION__ << ": Movemap from: " << val.first << " to: " << val.second << "\n";
+        }
+#endif
+
+        for (unsigned bamIndex(0); bamIndex < bamCount; ++bamIndex)
+        {
+            SVCandidateSetSequenceFragmentSampleGroup& svDataGroup(svData.getDataGroup(bamIndex));
+            for (SVCandidateSetSequenceFragment& fragment : svDataGroup)
+            {
+                for (SVSequenceFragmentAssociation& sva : fragment.svLink)
+                {
+                    if (moveSVIndex.count(sva.index))
+                    {
+                        sva.index = moveSVIndex[sva.index];
+                    }
+                }
+            }
+        }
+    }
+}
+
+
+
+/// store additional signal rate information to help decide if the candidate evidence
+/// is significant relative to background noise in the sample.
+///
+/// TODO -- is the following comment out of date?: only handles complex cases for now (assumes sv is complex)
+static
+void
+updateEvidenceIndex(
+    const SVCandidateSetSequenceFragment& fragment,
+    const SVObservation& obs,
+    FatSVCandidate& sv)
+{
+    if (obs.isSingleReadSource())
+    {
+        const SVCandidateSetRead& candRead(obs.isRead1Source() ? fragment.read1 : fragment.read2);
+        if (obs.evtype != SVEvidenceType::SPLIT_ALIGN)
+        {
+            if ((not candRead.bamrec.empty()) && (not candRead.isSubMapped) )
+            {
+                sv.bp1EvidenceIndex[obs.evtype].push_back(candRead.readIndex);
+#ifdef DEBUG_SVDATA
+                log_os << __FUNCTION__ << ": non_split: Added readIndex " << candRead.readIndex
+                       << " to evtype " << obs.evtype << "\n";
+#endif
+
+            }
+        }
+        else
+        {
+            const bool is1to1(sv.isIntersect1to1(obs));
+            auto& readBp(is1to1 ? sv.bp1EvidenceIndex : sv.bp2EvidenceIndex);
+            auto& readSuppBp(is1to1 ? sv.bp2EvidenceIndex : sv.bp1EvidenceIndex);
+            const auto& read(obs.isRead1Source() ? fragment.read1 : fragment.read2);
+            const auto& readSupp(obs.isRead1Source() ? fragment.read1Supplemental : fragment.read2Supplemental);
+
+            if ((not read.bamrec.empty()) && (not read.isSubMapped))
+            {
+                readBp[obs.evtype].push_back(read.readIndex);
+#ifdef DEBUG_SVDATA
+                log_os << __FUNCTION__ << ": split_mapped: Added readIndex " << candRead.readIndex
+                       << " to evtype " << obs.evtype << "\n";
+#endif
+            }
+            if (readSupp.size() == 1)
+            {
+                if ((not readSupp.front().bamrec.empty()) && (not readSupp.front().isSubMapped))
+                {
+                    readSuppBp[obs.evtype].push_back(readSupp.front().readIndex);
+#ifdef DEBUG_SVDATA
+                    log_os << __FUNCTION__ << ": split_supp_mapped: Added readIndex " << readSupp.front().readIndex
+                           << " to evtype " << obs.evtype << "\n";
+#endif
+                }
+            }
+        }
+    }
+    else
+    {
+        // account for bp1 and bp2 mapping to read1 and read2:
+        const bool is1to1(sv.isIntersect1to1(obs));
+        const SVCandidateSetRead& bp1Read(is1to1 ? fragment.read1 : fragment.read2);
+        const SVCandidateSetRead& bp2Read(is1to1 ? fragment.read2 : fragment.read1);
+        if ((not bp1Read.bamrec.empty()) && (not bp1Read.isSubMapped))
+        {
+            sv.bp1EvidenceIndex[obs.evtype].push_back(bp1Read.readIndex);
+#ifdef DEBUG_SVDATA
+            log_os << __FUNCTION__ << ": multi_read_source: Added readIndex " << bp1Read.readIndex
+                   << " to evtype " << obs.evtype << "\n";
+#endif
+        }
+        if ((not bp1Read.bamrec.empty()) && (not bp2Read.isSubMapped))
+        {
+            sv.bp2EvidenceIndex[obs.evtype].push_back(bp2Read.readIndex);
+#ifdef DEBUG_SVDATA
+            log_os << __FUNCTION__ << ": multi_read_source: Added readIndex " << bp2Read.readIndex
+                   << " to evtype " << obs.evtype << "\n";
+#endif
+        }
+    }
+}
+
+
+
+/// readCandidates are the set of hypotheses generated by individual read pair --
+/// this is the read pair which we seek to assign to one of the identified SVs (in svs)
+/// or we push the candidate into svs to start a new candidate associated with this edge
+///
+/// this is meant as only a temporary form of hypothesis generation, in the current system
+/// we do at least delineate alternative candidates by strand and region overlap, but over
+/// the longer term we should be able to delineate cluster by a clustering of possible
+/// breakend locations.
+///
+/// \param isExpandSVCandidateSet if false, don't add new SVs or expand existing SVs
+///
+void
+SVFinder::
+assignFragmentObservationsToSVCandidates(
+    const SVLocusNode& node1,
+    const SVLocusNode& node2,
+    const std::vector<SVObservation>& readCandidates,
+    const bool isExpandSVCandidateSet,
+    SVCandidateSetSequenceFragment& fragment,
+    std::vector<FatSVCandidate>& svs)
+{
+    // we anticipate so few svs from the POC method, that there's no indexing on them
+    for (const SVObservation& readCand : readCandidates)
+    {
+#ifdef DEBUG_SVDATA
+        log_os << __FUNCTION__ << ": Starting assignment for read cand: " << readCand << "\n";
+#endif
+        if (_isRNA)
+        {
+            int minLength = isCis(readCand) ? _scanOpt.minRNACisLength : _scanOpt.minRNALength;
+            if (isSVBelowMinSize(readCand, minLength))
+            {
+#ifdef DEBUG_SVDATA
+                log_os << __FUNCTION__ << ": Filtered short RNA Candidate (< " << minLength << ")\n";
+#endif
+                continue;
+            }
+        }
+
+        // remove candidates which don't match the current edge:
+        //
+        const bool isComplexCand(isComplexSV(readCand));
+        if (isComplexCand)
+        {
+            if (! readCand.bp1.interval.isIntersect(node1.getInterval())) continue;
+            if (! readCand.bp1.interval.isIntersect(node2.getInterval())) continue;
+        }
+        else
+        {
+            const bool isIntersect((readCand.bp1.interval.isIntersect(node1.getInterval())) &&
+                                   (readCand.bp2.interval.isIntersect(node2.getInterval())));
+            const bool isSwapIntersect((readCand.bp1.interval.isIntersect(node2.getInterval())) &&
+                                       (readCand.bp2.interval.isIntersect(node1.getInterval())));
+            if (! (isIntersect || isSwapIntersect)) continue;
+        }
+
+        // spanning means there's a left|right and left|right breakend pair (in any order) -- note this is not the
+        // same as asking if the evidence comes from a read pair. For instance, a CIGAR string can
+        // provide a spanning, non-read-pair candidate
+        const bool isSpanningCand(isSpanningSV(readCand));
+
+        bool isMatched(false);
+        unsigned svIndex(0);
+        for (FatSVCandidate& sv : svs)
+        {
+            if (sv.isIntersect(readCand))
+            {
+#if 0
+                /// keep candidates formed by semi-mapped reads in separate groups,
+                /// these will only be used to augment the evidence of a candidate created with
+                /// regular pair evidence -- all purely local candidates will be thrown away.
+                ///
+                /// the separation starts early (ie. here) because we might not want to use the local-pair
+                /// regions... this will take some trial and error
+                ///
+                const bool isCandLocalOnly(readCand.evtype == SVEvidenceType::LOCAL_PAIR);
+                const bool isSVLocalOnly(sv.bp1.isLocalPairOnly() && sv.bp2.isLocalPairOnly());
+
+                if (isCandLocalOnly == isSVLocalOnly)
+#endif
+                {
+                    if (isSpanningCand)
+                    {
+                        // don't store fragment association unless there's a specific hypothesis --
+                        // if there is no hypothesis (small assembly cases (thus "! isSpanning")), we'll be
+                        // going back through the bam region during assembly anyway:
+                        //
+                        fragment.svLink.emplace_back(svIndex,readCand.evtype);
+                    }
+
+                    updateEvidenceIndex(fragment,readCand,sv);
+
+                    // check evidence distance:
+                    sv.merge(FatSVCandidate(readCand), isExpandSVCandidateSet);
+
+#ifdef DEBUG_SVDATA
+                    log_os << __FUNCTION__ << ": Added to svIndex: " << svIndex << " match_sv: " << sv << "\n";
+#endif
+
+                    isMatched=true;
+                    break;
+                }
+            }
+            svIndex++;
+        }
+
+        const bool createNewCandidate(isExpandSVCandidateSet && (! isMatched));
+        if (createNewCandidate)
+        {
+            const unsigned newSVIndex(svs.size());
+
+#ifdef DEBUG_SVDATA
+            log_os << __FUNCTION__ << ": New svIndex: " << newSVIndex << "\n";
+#endif
+
+            svs.push_back(FatSVCandidate(readCand));
+            svs.back().candidateIndex = newSVIndex;
+
+            if (isSpanningCand)
+            {
+                // ditto note above, store fragment association only when there's an SV hypothesis:
+                fragment.svLink.emplace_back(newSVIndex,readCand.evtype);
+            }
+            updateEvidenceIndex(fragment,readCand,svs.back());
+        }
+    }
+}
+
+
+
+void
+SVFinder::
+processSequenceFragment(
+    const SVLocusNode& node1,
+    const SVLocusNode& node2,
+    const bam_header_info& bamHeader,
+    const reference_contig_segment& refSeq1,
+    const reference_contig_segment& refSeq2,
+    const unsigned bamIndex,
+    const bool isExpandSVCandidateSet,
+    std::vector<FatSVCandidate>& svs,
+    SVCandidateSetSequenceFragment& fragment,
+    SVFinderStats& stats)
+{
+    SVCandidateSetRead* localReadPtr(&(fragment.read1));
+    SVCandidateSetRead* remoteReadPtr(&(fragment.read2));
+    fragment.svLink.clear();
+
+    if (! localReadPtr->isSet())
+    {
+        std::swap(localReadPtr,remoteReadPtr);
+    }
+
+    if (! localReadPtr->isSet())
+    {
+        // this could occur when a supplemental read only is found:
+        return;
+        //assert(localReadPtr->isSet() && "Neither read in pair is set");
+    }
+
+    // sanity check of read pairs
+    if (! fragment.checkReadPair())
+    {
+        stats.unmatchedReadPairFilter++;
+        return;
+    }
+
+    const bam_record* remoteBamRecPtr( remoteReadPtr->isSet() ? &(remoteReadPtr->bamrec) : nullptr);
+
+    const reference_contig_segment& localRef( localReadPtr->isNode1 ? refSeq1 : refSeq2 );
+    const reference_contig_segment* remoteRefPtr(nullptr);
+    if (remoteReadPtr->isSet())
+    {
+        remoteRefPtr = (remoteReadPtr->isNode1 ?  &refSeq1 : &refSeq2 );
+    }
+    _readScanner.getBreakendPair(localReadPtr->bamrec, remoteBamRecPtr,
+                                 bamIndex, bamHeader, localRef,
+                                 remoteRefPtr, _readCandidates);
+
+    // collapse close spanning sv candidates into complex candidates -- this reflects the fact that the
+    // assembler will collapse them anyway, so reduces duplicated work in the assembler;
+    for (SVObservation& cand : _readCandidates)
+    {
+        if (getSVType(cand) != SV_TYPE::INDEL) continue;
+        known_pos_range2 r1(cand.bp1.interval.range);
+        known_pos_range2 r2(cand.bp2.interval.range);
+        static const pos_t window(30);
+        r1.expandBy(window);
+        r2.expandBy(window);
+        if (! r1.is_range_intersect(r2)) continue;
+
+        // collapse this case:
+        cand.bp1.state = SVBreakendState::COMPLEX;
+        cand.bp2.state = SVBreakendState::UNKNOWN;
+        cand.bp1.interval.range.merge_range(cand.bp2.interval.range);
+    }
+
+    // hack split read observations to be symmetrically supported, even though we're only
+    // reading in one side:
+    for (SVObservation& readCand : _readCandidates)
+    {
+        using namespace SVEvidenceType;
+        if ( readCand.evtype != SPLIT_ALIGN ) continue;
+
+        if (readCand.bp1.lowresEvidence.getVal(SPLIT_ALIGN) == 0) readCand.bp1.lowresEvidence.add(SPLIT_ALIGN);
+        if (readCand.bp2.lowresEvidence.getVal(SPLIT_ALIGN) == 0) readCand.bp2.lowresEvidence.add(SPLIT_ALIGN);
+    }
+
+#ifdef DEBUG_SVDATA
+    log_os << __FUNCTION__ << ": Checking pair: " << fragment << "\n";
+    log_os << __FUNCTION__ << ": Translated to candidates:\n";
+    for (const SVObservation& cand : _readCandidates)
+    {
+        log_os << __FUNCTION__ << ": cand: " << cand << "\n";
+    }
+#endif
+    assignFragmentObservationsToSVCandidates(node1, node2, _readCandidates, isExpandSVCandidateSet, fragment, svs);
+}
+
+
+
+#if 0
+static
+bool
+isLocalEvidence(
+    const SVEvidenceType::index_t idx)
+{
+    using namespace SVEvidenceType;
+
+    switch (idx)
+    {
+    case CIGAR:
+    case SOFTCLIP:
+    case SEMIALIGN:
+        return true;
+    default:
+        return false;
+    }
+}
+#endif
+
+
+
+static
+bool
+isCandidateCountSufficient(
+    const SVCandidate& sv)
+{
+    static const unsigned minCandidateComplexCount(2);
+    const SVBreakendLowResEvidence& evidence(sv.bp1.lowresEvidence);
+    for (unsigned i(0); i<SVEvidenceType::SIZE; ++i)
+    {
+        if (SVEvidenceType::isPairType(i)) continue;
+        if (evidence.getVal(i) >= minCandidateComplexCount) return true;
+    }
+    return false;
+}
+
+
+/// determine if the rate of supporting read observations at a breakpoint is significant
+/// relative to a background noise rate
+///
+/// \param signalReadInfo vector which has a size equal to the supporting read count, for each supporting read, the vector
+///                    contains a relative index for the supporting read among all qualifying (mapped) reads from
+///                    the same input BAM file. This relative index is used to estimate signal density.
+/// \return true if we reject the null hyp that breakpoint signal is noise
+static
+bool
+isBreakPointSignificant(
+    const double alpha,
+    const double noiseRate,
+    std::vector<double>& signalReadInfo)
+{
+    const unsigned signalReadCount(signalReadInfo.size());
+
+    // enforce a simple minimum signal count regardless of noiseRate/alpha
+    if (signalReadCount < 2) return false;
+
+    assert(signalReadCount >= 1);
+
+    // sort the indices so that by taking the difference we can get an estimate of the
+    // number of background reads that occurred 'between' the signal observations:
+    std::sort(signalReadInfo.begin(),signalReadInfo.end());
+#ifdef DEBUG_SVDATA
+    log_os << __FUNCTION__ << ": signalReadInfo_size=" << signalReadInfo.size()
+           << " signalReadInfo={";
+    for (const auto rd : signalReadInfo)
+    {
+        log_os << rd << ",";
+    }
+    log_os << "}\n";
+#endif
+
+
+    // The density of signal reads will be variable, so focus on peak density. To do this we pick out a small
+    // continuous set of signal observations. The count of observation intervals in this window is
+    // 'signalWindowSize' equal to maxSignalWindowSize (or less if the total number of observation
+    // intervals is smaller)
+    //
+    static const unsigned maxSignalWindowSize(4);
+    unsigned signalWindowSize(std::min(maxSignalWindowSize, (signalReadCount-1)));
+
+    // move a sliding window of size 'signalWindowSize' through the sorted read index list to find the minimum
+    // estimated background read count among a continuous signalWindowSize+1 series of signal observations.
+    //
+    // this greedy minimum approach will tend to err on the side of over-estimating signal rate and calling noisy
+    // evidence significant, which is the behavior we want for this kind of conservative noise filter.
+    double minWindowBackgroundCount(0);
+    for (unsigned signalReadIndex(0); signalReadIndex<(signalReadCount-signalWindowSize); ++signalReadIndex)
+    {
+        const double windowBackgroundCount(signalReadInfo[signalReadIndex+signalWindowSize] - signalReadInfo[signalReadIndex]);
+        if ((signalReadIndex==0) or (minWindowBackgroundCount > windowBackgroundCount))
+        {
+            minWindowBackgroundCount = windowBackgroundCount;
+        }
+    }
+    if (signalWindowSize > minWindowBackgroundCount) signalWindowSize = unsigned(minWindowBackgroundCount);
+
+#ifdef DEBUG_SVDATA
+    {
+        log_os << __FUNCTION__
+               << ": noiseRate=" << noiseRate
+               << " signalWindowSize=" << signalWindowSize
+               << " backgroundCount=" << minWindowBackgroundCount
+               << " isReject=" << is_reject_binomial_gte_n_success_exact(alpha, noiseRate, signalWindowSize, static_cast<unsigned>(minWindowBackgroundCount))
+               << "\n";
+    }
+#endif
+
+    return is_reject_binomial_gte_n_success_exact(alpha, noiseRate, signalWindowSize, static_cast<unsigned>(minWindowBackgroundCount));
+}
+
+
+
+/// test a spanning candidate for minimum supporting evidence level prior
+/// to assembly and scoring stages
+///
+/// Note this test is applied early, and as such it is intended to only filter
+/// out cases which are very likely to be noise. This method has an important
+/// role in controlling the analysis runtime for FFPE tumor samples, where we
+/// might otherwise initiate a very large number of assembly processes for unlikely
+/// variant candidates.
+static
+bool
+isSpanningCandidateSignalSignificant(
+    const double noiseRate,
+    const FatSVCandidate& sv)
+{
+    std::vector<double> evidence_bp1;
+    std::vector<double> evidence_bp2;
+    for (unsigned evidenceTypeIndex(0); evidenceTypeIndex<SVEvidenceType::SIZE; ++evidenceTypeIndex)
+    {
+        appendVec(evidence_bp1,sv.bp1EvidenceIndex[evidenceTypeIndex]);
+        appendVec(evidence_bp2,sv.bp2EvidenceIndex[evidenceTypeIndex]);
+    }
+
+    static const double alpha(0.05);
+    const bool isBp1(isBreakPointSignificant(alpha, noiseRate, evidence_bp1));
+    const bool isBp2(isBreakPointSignificant(alpha, noiseRate, evidence_bp2));
+
+    return (isBp1 || isBp2);
+}
+
+
+
+static
+bool
+isComplexCandidateSignalSignificant(
+    const double noiseRate,
+    const FatSVCandidate& sv)
+{
+    std::vector<double> evidence;
+    for (unsigned i(0); i<SVEvidenceType::SIZE; ++i)
+    {
+        //if (! isLocalEvidence(i)) continue;
+        appendVec(evidence,sv.bp1EvidenceIndex[i]);
+    }
+    static const double alpha(0.005);
+    return (isBreakPointSignificant(alpha, noiseRate,evidence));
+}
+
+
+
+namespace SINGLE_FILTER
+{
+enum index_t
+{
+    NONE,
+    SEMIMAPPED,
+    COMPLEXLOWCOUNT,
+    COMPLEXLOWSIGNAL,
+    SPANNINGLOWSIGNAL
+};
+}
+
+
+
+/// return true for candidates that should be filtered out, based on
+/// information available in a single junction (as opposed to
+/// requiring multi-junction analysis
+///
+static
+SINGLE_FILTER::index_t
+isFilterSingleJunctionCandidate(
+    const bool isRNA,
+    const double spanningNoiseRate,
+    const double assemblyNoiseRate,
+    const FatSVCandidate& sv)
+{
+    using namespace SINGLE_FILTER;
+
+    // don't consider candidates created from only
+    // semi-mapped read pairs (ie. one read of the pair is MAPQ0 or MAPQsmall)
+    if (sv.bp1.isLocalOnly() && sv.bp2.isLocalOnly()) return SEMIMAPPED;
+
+    // candidates must have a minimum amount of evidence:
+    if (isSpanningSV(sv))
+    {
+        /// TODO make sensitivity adjustments for RNA here:
+        if (! isRNA)
+        {
+            if (! isSpanningCandidateSignalSignificant(spanningNoiseRate, sv)) return SPANNINGLOWSIGNAL;
+        }
+    }
+    else if (isComplexSV(sv))
+    {
+        if (! isCandidateCountSufficient(sv)) return COMPLEXLOWCOUNT;
+        if (! isComplexCandidateSignalSignificant(assemblyNoiseRate, sv)) return COMPLEXLOWSIGNAL;
+    }
+    else
+    {
+        assert(false && "Unknown SV candidate type");
+    }
+
+    return NONE;
+}
+
+
+
+static
+void
+filterCandidates(
+    const bool isRNA,
+    const double spanningNoiseRate,
+    const double assemblyNoiseRate,
+    std::vector<FatSVCandidate>& svs,
+    SVFinderStats& stats)
+{
+    unsigned svCount(svs.size());
+    unsigned index(0);
+    while (index<svCount)
+    {
+        using namespace SINGLE_FILTER;
+        const index_t filt(isFilterSingleJunctionCandidate(isRNA, spanningNoiseRate,assemblyNoiseRate,svs[index]));
+
+        bool isFilter(false);
+        switch (filt)
+        {
+        case SEMIMAPPED:
+            stats.semiMappedFilter++;
+            isFilter = true;
+            break;
+        case COMPLEXLOWCOUNT:
+            stats.ComplexLowCountFilter++;
+            isFilter = true;
+            break;
+        case COMPLEXLOWSIGNAL:
+            stats.ComplexLowSignalFilter++;
+            isFilter = true;
+            break;
+        case SPANNINGLOWSIGNAL:
+            svs[index].isSingleJunctionFilter = true;
+            break;
+        default:
+            break;
+        }
+
+        if (isFilter)
+        {
+            if ((index+1) < svCount) svs[index] = svs.back();
+            svs.resize(--svCount);
+        }
+        else
+        {
+            index++;
+        }
+    }
+}
+
+
+
+void
+SVFinder::
+getCandidatesFromData(
+    const SVLocusNode& node1,
+    const SVLocusNode& node2,
+    const bam_header_info& bamHeader,
+    const reference_contig_segment& refSeq1,
+    const reference_contig_segment& refSeq2,
+    SVCandidateSetData& svData,
+    std::vector<SVCandidate>& output_svs,
+    SVFinderStats& stats)
+{
+    const unsigned bamCount(_bamStreams.size());
+
+    // track a richer candidates data structure internally, then slice the info down to the
+    // regular sv candidate as a last step:
+    std::vector<FatSVCandidate> svs;
+
+    for (unsigned bamIndex(0); bamIndex<bamCount; ++bamIndex)
+    {
+        SVCandidateSetSequenceFragmentSampleGroup& svDataGroup(svData.getDataGroup(bamIndex));
+        for (SVCandidateSetSequenceFragment& fragment : svDataGroup)
+        {
+            /// TODO update this test to generalize from read pair to split reads:
+            if (! fragment.isAnchored()) continue;
+
+            static const bool isAnchored(true);
+            processSequenceFragment(
+                node1, node2, bamHeader, refSeq1, refSeq2, bamIndex, isAnchored,
+                svs, fragment, stats);
+        }
+    }
+
+    if (_isSomatic)
+    {
+        for (unsigned bamIndex(0); bamIndex<bamCount; ++bamIndex)
+        {
+            // for somatic calling we're only interested in submapped read processing for the normal sample:
+            const bool isTumor(_isAlignmentTumor[bamIndex]);
+            if (isTumor) continue;
+
+            SVCandidateSetSequenceFragmentSampleGroup& svDataGroup(svData.getDataGroup(bamIndex));
+            for (SVCandidateSetSequenceFragment& pair : svDataGroup)
+            {
+                if (pair.isAnchored()) continue;
+
+                static const bool isAnchored(false);
+                processSequenceFragment(
+                    node1, node2, bamHeader, refSeq1, refSeq2, bamIndex, isAnchored,
+                    svs, pair, stats);
+            }
+        }
+    }
+
+#ifdef DEBUG_SVDATA
+    {
+        log_os << __FUNCTION__ << ": precount: " << svs.size() << "\n";
+
+        unsigned svIndex(0);
+        for (SVCandidate& sv : svs)
+        {
+            log_os << __FUNCTION__ << ": PRECOUNT: index: " << svIndex << " " << sv;
+            svIndex++;
+        }
+    }
+#endif
+
+    consolidateOverlap(bamCount,svData,svs);
+
+#ifdef DEBUG_SVDATA
+    {
+        log_os << __FUNCTION__ << ": postcount: " << svs.size() << "\n";
+
+        unsigned svIndex(0);
+        for (SVCandidate& sv : svs)
+        {
+            log_os << __FUNCTION__ << ": POSTCOUNT: index: " << svIndex << " " << sv;
+            svIndex++;
+        }
+    }
+#endif
+
+    filterCandidates(_isRNA, _spanningNoiseRate, _assemblyNoiseRate,svs,stats);
+
+    std::copy(svs.begin(),svs.end(),std::back_inserter(output_svs));
+}
+
+
+
+void
+SVFinder::
+findCandidateSVImpl(
+    const EdgeInfo& edge,
+    SVCandidateSetData& svData,
+    std::vector<SVCandidate>& svs,
+    SVFinderStats& stats)
+{
+    svData.clear();
+    svs.clear();
+
+#ifdef DEBUG_SVDATA
+    log_os << "SVDATA: Evaluating edge: " << edge << "\n";
+#endif
+
+    const SVLocusSet& cset(getSet());
+
+    // first determine if this is an edge we're going to evaluate
+    //
+    // edge must be bidirectional at the noise threshold of the locus set:
+    if (! isBidirectionalEdge(cset, edge))
+    {
+#ifdef DEBUG_SVDATA
+        log_os << "SVDATA: Edge failed min edge count.\n";
+#endif
+        stats.edgeFilter++;
+        return;
+    }
+
+    //
+    // 1) scan through each region to identify all reads supporting
+    // some sort of breakend in the target region, then match up read
+    // pairs so that they can easily be accessed from each other
+    //
+    // 2) iterate through breakend read pairs to estimate the number, type
+    // and likely breakend interval regions of SVs corresponding to this edge
+    //
+    const bam_header_info& bamHeader(cset.header);
+
+    const SVLocus& locus(cset.getLocus(edge.locusIndex));
+
+    reference_contig_segment refSeq1;
+    reference_contig_segment refSeq2;
+    {
+        GenomeInterval searchInterval;
+        getNodeRefSeq(bamHeader, locus, edge.nodeIndex1, _referenceFilename, searchInterval, refSeq1);
+        addSVNodeData(bamHeader, locus, edge.nodeIndex1, edge.nodeIndex2,
+                      searchInterval, refSeq1, true, svData);
+    }
+
+    if (edge.nodeIndex1 != edge.nodeIndex2)
+    {
+        GenomeInterval searchInterval;
+        getNodeRefSeq(bamHeader, locus, edge.nodeIndex2, _referenceFilename, searchInterval, refSeq2);
+        addSVNodeData(bamHeader, locus, edge.nodeIndex2, edge.nodeIndex1,
+                      searchInterval, refSeq2, false, svData);
+    }
+
+    const SVLocusNode& node1(locus.getNode(edge.nodeIndex1));
+    const SVLocusNode& node2(locus.getNode(edge.nodeIndex2));
+    getCandidatesFromData(node1, node2, bamHeader, refSeq1, refSeq2,
+                          svData, svs, stats);
+
+    //checkResult(svData,svs);
+}
+
+
+
+void
+SVFinder::
+findCandidateSV(
+    const EdgeInfo& edge,
+    SVCandidateSetData& svData,
+    std::vector<SVCandidate>& svs)
+{
+    // time/stats tracking setup:
+    const TimeScoper candTime(_edgeTracker.candTime);
+    SVFinderStats stats;
+
+    findCandidateSVImpl(edge,svData,svs,stats);
+
+    // time/stats tracking finish:
+    _edgeStatMan.updateEdgeCandidates(edge, svs.size(), stats);
+
+    if (_isVerbose)
+    {
+        log_os << __FUNCTION__ << ": Low-resolution candidate generation complete. Candidate count: " << svs.size() << "\n";
+    }
+
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVFinder.hh b/src/c++/lib/applications/GenerateSVCandidates/SVFinder.hh
new file mode 100644
index 0000000..44b3f03
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVFinder.hh
@@ -0,0 +1,157 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "EdgeRuntimeTracker.hh"
+#include "FatSVCandidate.hh"
+#include "GSCOptions.hh"
+#include "GSCEdgeStatsManager.hh"
+#include "appstats/SVFinderStats.hh"
+#include "htsapi/bam_streamer.hh"
+#include "manta/ChromDepthFilterUtil.hh"
+#include "manta/SVCandidateSetData.hh"
+#include "manta/SVLocusScanner.hh"
+#include "svgraph/EdgeInfo.hh"
+#include "svgraph/SVLocusSet.hh"
+
+#include <vector>
+
+
+struct SVFinder
+{
+    SVFinder(
+        const GSCOptions& opt,
+        const SVLocusScanner& readScanner,
+        EdgeRuntimeTracker& edgeTracker,
+        GSCEdgeStatsManager& edgeStatMan);
+
+    ~SVFinder();
+
+    const SVLocusSet&
+    getSet() const
+    {
+        return _set;
+    }
+
+    void
+    findCandidateSV(
+        const EdgeInfo& edge,
+        SVCandidateSetData& svData,
+        std::vector<SVCandidate>& svs);
+
+    void
+    checkResult(
+        const SVCandidateSetData& svData,
+        const std::vector<SVCandidate>& svs) const;
+
+private:
+
+    void
+    addSVNodeData(
+        const bam_header_info& bamHeader,
+        const SVLocus& locus,
+        const NodeIndexType node1,
+        const NodeIndexType node2,
+        const GenomeInterval& searchInterval,
+        const reference_contig_segment& refSeq,
+        const bool isNode1,
+        SVCandidateSetData& svData);
+
+    void
+    assignFragmentObservationsToSVCandidates(
+        const SVLocusNode& node1,
+        const SVLocusNode& node2,
+        const std::vector<SVObservation>& readCandidates,
+        const bool isExpandSVCandidateSet,
+        SVCandidateSetSequenceFragment& fragment,
+        std::vector<FatSVCandidate>& svs);
+
+    /// we either process the fragment to discover new SVs and expand existing SVs,
+    /// or we go through and add pairs to existing SVs without expansion
+    ///
+    void
+    processSequenceFragment(
+        const SVLocusNode& node1,
+        const SVLocusNode& node2,
+        const bam_header_info& bamHeader,
+        const reference_contig_segment& refSeq1,
+        const reference_contig_segment& refSeq2,
+        const unsigned bamIndex,
+        const bool isExpandSVCandidateSet,
+        std::vector<FatSVCandidate>& svs,
+        SVCandidateSetSequenceFragment& fragment,
+        SVFinderStats& stats);
+
+    void
+    getCandidatesFromData(
+        const SVLocusNode& node1,
+        const SVLocusNode& node2,
+        const bam_header_info& bamHeader,
+        const reference_contig_segment& refSeq1,
+        const reference_contig_segment& refSeq2,
+        SVCandidateSetData& svData,
+        std::vector<SVCandidate>& svs,
+        SVFinderStats& stats);
+
+    void
+    findCandidateSVImpl(
+        const EdgeInfo& edge,
+        SVCandidateSetData& svData,
+        std::vector<SVCandidate>& svs,
+        SVFinderStats& stats);
+
+    const ChromDepthFilterUtil&
+    dFilter() const
+    {
+        return *(_dFilterPtr);
+    }
+
+    const ReadScannerOptions _scanOpt;
+    const std::vector<bool> _isAlignmentTumor;
+    SVLocusSet _set;
+    std::unique_ptr<ChromDepthFilterUtil> _dFilterPtr;
+    const SVLocusScanner& _readScanner;
+
+    const std::string _referenceFilename;
+
+    const bool _isRNA;
+    const bool _isVerbose;
+    bool _isSomatic;
+
+    typedef std::shared_ptr<bam_streamer> streamPtr;
+    std::vector<streamPtr> _bamStreams;
+
+    /// this is only here as syscall cache:
+    std::vector<SVObservation> _readCandidates;
+
+    /// throwaway stats tracker...
+    SampleEvidenceCounts _eCounts;
+
+    double _spanningNoiseRate;
+    double _assemblyNoiseRate;
+
+    EdgeRuntimeTracker& _edgeTracker;
+    GSCEdgeStatsManager& _edgeStatMan;
+};
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVScorePairAltProcessor.cpp b/src/c++/lib/applications/GenerateSVCandidates/SVScorePairAltProcessor.cpp
new file mode 100644
index 0000000..0dec5fd
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVScorePairAltProcessor.cpp
@@ -0,0 +1,596 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders and Xiaoyu Chen
+///
+
+#include "SVScorePairAltProcessor.hh"
+#include "blt_util/seq_util.hh"
+#include "blt_util/SimpleAlignment.hh"
+#include "common/Exceptions.hh"
+#include "htsapi/bam_record_util.hh"
+#include "manta/SVCandidateUtil.hh"
+
+#include <cassert>
+
+#include <sstream>
+
+
+/// standard debug output for this file:
+//#define DEBUG_PAIR
+
+/// ridiculous debug output for this file:
+//#define DEBUG_MEGAPAIR
+
+//#define DEBUG_SHADOW
+
+//#define DEBUG_SUPPORT
+
+#if defined(DEBUG_PAIR) || defined(DEBUG_MEGAPAIR) || defined(DEBUG_SHADOW) || defined(DEBUG_SUPPORT)
+#define ANY_DEBUG_PAIR
+#endif
+
+#ifdef ANY_DEBUG_PAIR
+#include "blt_util/log.hh"
+#endif
+
+
+
+ContigParams::
+ContigParams(
+    const SVCandidateAssemblyData& assemblyData,
+    const SVCandidate& sv) :
+    extSeq(assemblyData.extendedContigs[sv.assemblyAlignIndex])
+{
+    // this class is designed for simple alts only:
+    assert(sv.bp1.interval.tid == sv.bp2.interval.tid);
+    assert(getSVType(sv) == SV_TYPE::INDEL);
+    assert(! sv.isImprecise());
+
+    const bool isBp1First(sv.bp1.interval.range.begin_pos()<=sv.bp2.interval.range.begin_pos());
+
+    const SVBreakend& bpA(isBp1First ? sv.bp1 : sv.bp2);
+    const SVBreakend& bpB(isBp1First ? sv.bp2 : sv.bp1);
+
+    const pos_t bpAHomLength(static_cast<pos_t>(bpA.interval.range.size())-1);
+    const pos_t bpBHomLength(static_cast<pos_t>(bpB.interval.range.size())-1);
+    assert(bpAHomLength >= 0);
+    assert(bpBHomLength >= 0);
+
+    const bool isSpanning(assemblyData.isSpanning);
+
+    // the begin_pos off by one here is inherited from a more complex previous computation, the code
+    // which uses this value has already been debugged and is functioning, so no reason to fix it, but
+    // note this range doesn't follow the manta convention (probably by accident)
+    segmentSpan.set_range(bpA.interval.range.begin_pos()+1,bpB.interval.range.begin_pos());
+
+    // the beginPos of align is the length of reference padding in the extended contig
+    // |ref padding| + |alignment segments|
+    // both bp1 and bp2 include the insert and homology,
+    // which can avoid false split-read evidence from normal sample when the homology is long
+
+    // all offset range 'begin' values correspond to the zero-indexed base immediately before the breakend on the fwd-strand,
+    // and 'end' values correspond to the zero-indexed base immediately before the breakend on the forward strand+homology range
+    // In the absence of homology, begin and end should be equal.
+
+    // note that we add align.beginPos here to reflect coordinates in the extended Contig, in the regular contig we wouldn't add this
+    pos_t alignBeginPos(0);
+    pos_t readStartPos(0);
+    if (isSpanning)
+    {
+        const SVCandidateAssemblyData::JumpAlignmentResultType& alignment(assemblyData.spanningAlignments[sv.assemblyAlignIndex]);
+        alignBeginPos = alignment.align1.beginPos;
+        readStartPos = apath_read_length(alignment.align1.apath);
+    }
+    else
+    {
+        const AlignmentResult<int>& alignment(assemblyData.smallSVAlignments[sv.assemblyAlignIndex]);
+        const std::pair<unsigned, unsigned>& alignSegment(assemblyData.smallSVSegments[sv.assemblyAlignIndex][sv.assemblySegmentIndex]);
+        ALIGNPATH::path_t apathTillSvStart(&alignment.align.apath[0], &alignment.align.apath[alignSegment.first]);
+
+        alignBeginPos = alignment.align.beginPos;
+        readStartPos = apath_read_length(apathTillSvStart);
+    }
+    bpAOffset.set_begin_pos(alignBeginPos + readStartPos - 1);
+    bpAOffset.set_end_pos(bpAOffset.begin_pos() + bpAHomLength);
+    bpBOffset.set_begin_pos(bpAOffset.begin_pos() + sv.insertSeq.size());
+    bpBOffset.set_end_pos(bpBOffset.begin_pos() + bpBHomLength);
+
+#ifdef DEBUG_SHADOW
+    log_os << __FUNCTION__ << ": contigSize: " << extSeq.size()
+           << " segmentSpan: " << segmentSpan
+           << " bpAOffset: " << bpAOffset
+           << " bpBOffset: " << bpBOffset
+           << "\n";
+#endif
+}
+
+
+
+void
+SVScorePairAltProcessor::
+checkInput(
+    const SVCandidate& sv)
+{
+    using namespace illumina::common;
+
+    // this class is designed for simple alts only:
+    assert(sv.bp1.interval.tid == sv.bp2.interval.tid);
+    assert(getSVType(sv) == SV_TYPE::INDEL);
+}
+
+
+
+/// test whether a frag reference span provides sufficient support for a breakpoint of this sv:
+bool
+SVScorePairAltProcessor::
+testFragOverlap(
+    const int fragBeginRefPos,
+    const int fragEndRefPos) const
+{
+    const pos_t fragOverlap(std::min((1+svParams.centerPosA-fragBeginRefPos), (fragEndRefPos-svParams.centerPosB)));
+#ifdef DEBUG_MEGAPAIR
+    log_os << __FUNCTION__ << ": frag begin/end/overlap: " << fragBeginRefPos << " " << fragEndRefPos << " " << fragOverlap << "\n";
+#endif
+    return (fragOverlap >= pairOpt.minFragSupport);
+}
+
+
+
+bool
+SVScorePairAltProcessor::
+realignPairedRead(
+    const bam_record& bamRead,
+    const bool isLeftOfInsert,
+    const std::string& floatRead,
+    const pos_t anchorPos,
+    int& altTemplateSize)
+{
+    // TODO: basecall qualities??
+
+    // sanity check whether we should even start alignment -- check 'left of insert' consistency
+    //
+    if (isLeftOfInsert)
+    {
+        if (anchorPos >= _contig.segmentSpan.begin_pos()) return false;
+    }
+    else
+    {
+        const pos_t endPos(anchorPos + floatRead.size());
+        if (endPos <= _contig.segmentSpan.end_pos()) return false;
+    }
+
+    AlignmentResult<int> readAlignment;
+
+    typedef std::string::const_iterator siter;
+    const siter readBegin(floatRead.begin());
+    const siter readEnd(floatRead.end());
+    siter contigBegin(_contig.extSeq.begin());
+    siter contigEnd(_contig.extSeq.end());
+
+    // if the insertion is not fully assembled, align to only part of the contig:
+    int contigBeginOffset(0);
+    if (sv.isUnknownSizeInsertion)
+    {
+        // TODO check these results in test case:
+        if (isLeftOfInsert)
+        {
+            contigEnd = contigBegin + _contig.bpAOffset.begin_pos() + sv.unknownSizeInsertionLeftSeq.size();
+        }
+        else
+        {
+            contigBeginOffset = static_cast<int>(_contig.bpBOffset.begin_pos()) - sv.unknownSizeInsertionRightSeq.size();
+            assert(contigBeginOffset>=0);
+            contigBegin = contigBegin + contigBeginOffset;
+        }
+    }
+
+    _shadowAligner.align(
+        readBegin, readEnd,
+        contigBegin, contigEnd,
+        readAlignment);
+
+    //
+    // first determine if the read meets some minimal quality criteria
+    //
+
+    // require the complete alignment score to be some percentage of optimal after trimming off any expected softclip
+    {
+        using namespace ALIGNPATH;
+
+        const path_t readPath(readAlignment.align.apath);
+
+        const unsigned readSize(floatRead.size());
+        unsigned clipSize(0);
+
+        if (sv.isUnknownSizeInsertion)
+        {
+            if (isLeftOfInsert)
+            {
+                clipSize=apath_soft_clip_trail_size(readPath);
+            }
+            else
+            {
+                clipSize=apath_soft_clip_lead_size(readPath);
+            }
+        }
+
+#ifdef DEBUG_SHADOW
+        log_os << __FUNCTION__ << ": alignment: " << readPath << " clipSize: " << clipSize << "\n";
+#endif
+
+        assert(clipSize <= readSize);
+
+        const unsigned clippedReadSize(readSize-clipSize);
+
+        static const unsigned minAlignReadLength(40);
+        if (clippedReadSize < minAlignReadLength)
+        {
+            return false;
+        }
+
+        int nonClipScore(_shadowAligner.getPathScore(readPath, false));
+
+        static const float minScoreFrac(0.85f);
+        const int optimalScore(clippedReadSize*_shadowAligner.getScores().match);
+
+        const float scoreFrac(static_cast<float>(nonClipScore)/static_cast<float>(optimalScore));
+
+#ifdef DEBUG_SHADOW
+        log_os << __FUNCTION__ << ": optScore: " << optimalScore
+               << " nonClipScore: " << nonClipScore
+               << " scoreFrac: " << scoreFrac << "\n";
+#endif
+
+        if (scoreFrac < minScoreFrac)
+        {
+            return false;
+        }
+    }
+
+    //
+    // next determine what the altTemplateSize is if we believe the alignment
+    //
+
+    known_pos_range2 fakeRefSpan;
+    if (isLeftOfInsert)
+    {
+        fakeRefSpan.set_begin_pos(anchorPos);
+
+        // offset of read end on the contig, in contig coordinates
+        const unsigned shadowRefSpan(apath_ref_length(readAlignment.align.apath));
+        const int readContigEndOffset(contigBeginOffset + readAlignment.align.beginPos + shadowRefSpan);
+
+        // translate contig coordinates to fake reference coordinates:
+        if (readContigEndOffset < _contig.bpAOffset.begin_pos())
+        {
+            // definitely does not meet the breakend overlap criteria:
+            return false;
+        }
+
+        /// set fake end as if the insert allele continued in reference coordinates
+        const int readContigEndRefOffset(_contig.segmentSpan.begin_pos() + (readContigEndOffset - _contig.bpAOffset.begin_pos()));
+        fakeRefSpan.set_end_pos(readContigEndRefOffset);
+
+#ifdef DEBUG_SHADOW
+        log_os << __FUNCTION__ << ": fakeRefSpan: " << fakeRefSpan
+               << " shadowRefSpan: " << shadowRefSpan
+               << " contigBeginOffset: " << contigBeginOffset
+               << " alignBeginPos: " << readAlignment.align.beginPos
+               << " readContigEndOffset: " << readContigEndOffset
+               << "\n";
+#endif
+    }
+    else
+    {
+        // approximate mate as having conventional alignment -- we could fix
+        // this with some buffering in ShadowReadFinder
+        fakeRefSpan.set_end_pos(anchorPos + floatRead.size());
+
+        // set fake begin as if the insert allele continued TO THE LEFT in reference coordinates:
+
+        // offset of read begin on the contig, in contig coordinates:
+        const int readContigBeginOffset(contigBeginOffset + readAlignment.align.beginPos);
+
+        // translate contig coordinates to fake reference coordinates:
+        if (readContigBeginOffset > _contig.bpBOffset.begin_pos())
+        {
+            // definitely does not meet the breakend overlap criteria:
+            return false;
+        }
+
+        const int readContigBeginRefOffset(_contig.segmentSpan.end_pos()-(_contig.bpBOffset.begin_pos()-readContigBeginOffset));
+
+        fakeRefSpan.set_begin_pos(readContigBeginRefOffset);
+#ifdef DEBUG_SHADOW
+        log_os << __FUNCTION__ << ": fakeRefSpan: " << fakeRefSpan
+               << " contigBeginOffset: " << contigBeginOffset
+               << " alignBeginPos: " << readAlignment.align.beginPos
+               << " readContigBeginOffset: " << readContigBeginOffset
+               << "\n";
+#endif
+    }
+
+    if (fakeRefSpan.begin_pos() > fakeRefSpan.end_pos())
+    {
+        using namespace illumina::common;
+
+        std::ostringstream oss;
+        oss << "ERROR: Failed to parse fragment range from bam record. Frag begin,end: " << fakeRefSpan << " bamRecord: " << bamRead << "\n";
+        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+    }
+
+    altTemplateSize=fakeRefSpan.size();
+
+    //
+    // finally determine if we cross a breakend boundary
+    //
+    if (! testFragOverlap(fakeRefSpan.begin_pos(), fakeRefSpan.end_pos())) return false;
+
+    // made it!
+#ifdef DEBUG_SHADOW
+    log_os << __FUNCTION__ << ": shadow passed pair tests\n";
+#endif
+
+    return true;
+}
+
+
+
+bool
+SVScorePairAltProcessor::
+alignShadowRead(
+    const bam_record& bamRead,
+    int& altTemplateSize)
+{
+    // TODO: basecall qualities??
+
+    // does the shadow occur to the left or right of the insertion?
+    const bool isLeftOfInsert(bamRead.is_mate_fwd_strand());
+#ifdef DEBUG_SHADOW
+    log_os << __FUNCTION__ << ": isLeftOfInsert: " << isLeftOfInsert << "\n";
+#endif
+
+    // do we need to revcomp the sequence?
+    std::string shadowRead(bamRead.get_bam_read().get_string());
+    if (isLeftOfInsert)
+    {
+        reverseCompStr(shadowRead);
+    }
+
+    const pos_t matePos(bamRead.mate_pos() -1);
+
+    return realignPairedRead(bamRead, isLeftOfInsert, shadowRead, matePos, altTemplateSize);
+}
+
+
+
+void
+SVScorePairAltProcessor::
+processClearedRecord(
+    const SVId& svId,
+    const bam_record& bamRead,
+    SupportFragments& svSupportFrags)
+{
+    using namespace illumina::common;
+
+    assert(bamParams.isSet);
+
+    const pos_t refPos(bamRead.pos()-1);
+    if (! bamParams.interval.range.is_pos_intersect(refPos)) return;
+
+    // many special rules applied for large insertions:
+    const bool isLargeInsert(isLargeInsertSV(sv));
+
+    bool isShadowAlignment(false);
+    bool isRepeatChimeraAlignment(false);
+
+    int templateSize(0);
+    int altTemplateSize(0);
+
+    if (isLargeInsert)
+    {
+        // test for shadow
+        {
+            const bool isShadowRead(_shadow.check(bamRead));
+
+            if (isShadowRead)
+            {
+
+                // does the shadow occur to the left or right of the insertion?
+                const bool isLeftOfInsert(bamRead.is_mate_fwd_strand());
+
+                // eval left of insert for Bp1 and right of insert for Bp2:
+                if (isLeftOfInsert != isBp1)
+                {
+#ifdef DEBUG_SHADOW
+                    log_os << __FUNCTION__ << ": shadow WEREWOLF isLeft: "  << isLeftOfInsert << " " << isBp1 << "\n";
+#endif
+                    return;
+                }
+
+                isShadowAlignment=alignShadowRead(bamRead,altTemplateSize);
+
+                if (! isShadowAlignment) return;
+#ifdef DEBUG_SHADOW
+                log_os << __FUNCTION__ << ": read passed shadow test altsize/record: "  << altTemplateSize << "/" << bamRead << "\n";
+#endif
+            }
+            else
+            {
+                // record the mapq value of the shadow mate:
+                if (_shadow.isShadowMate())
+                {
+                    SVFragmentEvidence& fragment(evidence.getSampleEvidence(bamParams.bamIndex)[bamRead.qname()]);
+                    SVFragmentEvidenceRead& evRead(fragment.getRead(bamRead.is_first()));
+                    setReadEvidence(svParams.minMapQ, svParams.minTier2MapQ, bamRead, isShadowAlignment, evRead);
+                }
+
+                // ok, not a shadow read, kick the read out if it fits shadow or shadow-mate criteria:
+                if (bamRead.is_unmapped() || (bamRead.is_paired() && bamRead.is_mate_unmapped())) return;
+            }
+        }
+
+        // test for MAPQ0 pair
+        {
+            typedef RemoteReadCache::const_iterator riter;
+            const RemoteReadCache& remotes(assemblyData.remoteReads);
+            riter remoteIter(remotes.find(bamRead.qname()));
+
+            if (remoteIter != remotes.end())
+            {
+                if (remoteIter->second.readNo != ( (bamRead.read_no() == 1) ? 2 : 1 )) return;
+
+                const bool isLeftOfInsert(bamRead.is_fwd_strand());
+
+                // eval left of insert for Bp1 and right of insert for Bp2:
+                if (isLeftOfInsert != isBp1)
+                {
+#ifdef DEBUG_SHADOW
+                    log_os << __FUNCTION__ << ": chimera WEREWOLF isLeft: "  << isLeftOfInsert << " " << isBp1 << "\n";
+#endif
+                    return;
+                }
+
+                const pos_t anchorPos(bamRead.pos()-1);
+                const std::string& remoteRead(remoteIter->second.readSeq); /// read is already revcomped as required when stored in cache
+                isRepeatChimeraAlignment=realignPairedRead(bamRead, isLeftOfInsert, remoteRead, anchorPos, altTemplateSize);
+
+                if (! isRepeatChimeraAlignment) return;
+            }
+            else
+            {
+                /// if we establish it's not a repeat chimera, then filter back down to the usual pair candidates:
+                if (! (bamRead.is_unmapped() || bamRead.is_mate_unmapped()))
+                {
+                    if (! is_innie_pair(bamRead)) return;
+                }
+            }
+        }
+    }
+
+    const bool isRealignedTemplate(isLargeInsert && (isShadowAlignment || isRepeatChimeraAlignment));
+
+#ifdef DEBUG_MEGAPAIR
+    log_os << __FUNCTION__ << ": read: " << bamRead << "\n";
+#endif
+
+    /// check if fragment is too big or too small:
+    bool isAnomTemplate(true);
+    if (! isRealignedTemplate)
+    {
+        templateSize=(std::abs(bamRead.template_size()));
+        altTemplateSize=(templateSize-svParams.altShift);
+
+        isAnomTemplate=((templateSize < bamParams.minFrag) || (templateSize > bamParams.maxFrag));
+    }
+
+#ifdef DEBUG_MEGAPAIR
+    log_os << __FUNCTION__ << ": tSize/aSize: " << templateSize << " " << altTemplateSize << "\n";
+#endif
+
+    // only filter out anomalous fragments for alt if the ref is also being filtered out:
+    //  (if we don't do this there will be a frag prob for ref and a zero for alt, leading to skewed results)
+    if (isAnomTemplate)
+    {
+        if (altTemplateSize < bamParams.minFrag)
+        {
+#ifdef DEBUG_MEGAPAIR
+            log_os << __FUNCTION__ << ": altsize below min\n";
+#endif
+            return;
+        }
+        if (altTemplateSize > bamParams.maxFrag)
+        {
+#ifdef DEBUG_MEGAPAIR
+            log_os << __FUNCTION__ << ": altsize above max\n";
+#endif
+            return;
+        }
+    }
+
+    // get fragment range and check overlap with breakend:
+    if (! isRealignedTemplate)
+    {
+        // count only from the down stream reads
+        const bool isFirstBamRead(isFirstRead(bamRead));
+
+        pos_t fragBeginRefPos(refPos);
+        if (! isFirstBamRead)
+        {
+            fragBeginRefPos=bamRead.mate_pos()-1;
+        }
+
+        const pos_t fragEndRefPos(fragBeginRefPos+templateSize);
+
+        if (fragBeginRefPos > fragEndRefPos)
+        {
+            std::ostringstream oss;
+            oss << "ERROR: Failed to parse fragment range from bam record. Frag begin,end: " << fragBeginRefPos << " " << fragEndRefPos << " bamRecord: " << bamRead << "\n";
+            BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+        }
+
+        if (! testFragOverlap(fragBeginRefPos, fragEndRefPos)) return;
+    }
+
+    SVFragmentEvidence& fragment(evidence.getSampleEvidence(bamParams.bamIndex)[bamRead.qname()]);
+
+    SVFragmentEvidenceRead& evRead(fragment.getRead(bamRead.is_first()));
+
+    const unsigned readSize(bamRead.read_size());
+    unsigned mapq(bamRead.map_qual());
+    if (isShadowAlignment)
+    {
+        mapq=_shadow.getMateMapq();
+    }
+    setReadEvidence(svParams.minMapQ, svParams.minTier2MapQ, mapq, readSize, isRealignedTemplate, evRead);
+
+    if (isRepeatChimeraAlignment)
+    {
+        //enter the mate read:
+        SVFragmentEvidenceRead& evMateRead(fragment.getRead(! bamRead.is_first()));
+        setReadEvidence(svParams.minMapQ, svParams.minTier2MapQ, mapq, readSize, isRealignedTemplate, evMateRead);
+    }
+
+    SVFragmentEvidenceAlleleBreakend& svAltBp(fragment.alt.getBp(isBp1));
+    setAlleleFrag(*bamParams.fragDistroPtr, altTemplateSize, svAltBp, isLargeInsert);
+#ifdef DEBUG_MEGAPAIR
+    log_os << __FUNCTION__ << ": altset: " << svAltBp << "\n";
+#endif
+
+    if (fragment.isAltSpanningPairSupport())
+    {
+        SupportFragment& supportFrag(svSupportFrags.getSupportFragment(bamRead));
+        supportFrag.addSpanningSupport(svId.localId);
+#ifdef DEBUG_SUPPORT
+        log_os << __FUNCTION__ << "  Adding read support (spanning): "
+               << bamRead.qname() << "\t" << supportFrag;
+#endif
+    }
+
+    if (! isRealignedTemplate)
+    {
+        // when an alt entry is made for a fragment, we try to always create corresponding ref entry
+        // in theory this will get picked up by the ref scanner anyway, but the cost of missing this
+        // is all sorts of really bad somatic FNs
+        setAlleleFrag(*bamParams.fragDistroPtr, templateSize, fragment.ref.getBp(isBp1), isLargeInsert);
+    }
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVScorePairAltProcessor.hh b/src/c++/lib/applications/GenerateSVCandidates/SVScorePairAltProcessor.hh
new file mode 100644
index 0000000..8159ac1
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVScorePairAltProcessor.hh
@@ -0,0 +1,137 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "SVScorePairProcessor.hh"
+
+#include "manta/ShadowReadFinder.hh"
+#include "manta/SVCandidateAssemblyData.hh"
+#include "options/ReadScannerOptions.hh"
+#include "options/SVRefinerOptions.hh"
+
+
+struct ContigParams
+{
+    ContigParams(
+        const SVCandidateAssemblyData& assemblyData,
+        const SVCandidate& sv);
+
+    /// extended contig:
+    const std::string& extSeq;
+
+    /// where does the sv segment begin,end in reference coordinates?:
+    known_pos_range2 segmentSpan;
+
+    known_pos_range2 bpAOffset;
+    known_pos_range2 bpBOffset;
+};
+
+
+/// estimate pair support for an sv candidate
+/// restricted to simple indel style svs
+struct SVScorePairAltProcessor : public SVScorePairProcessor
+{
+    SVScorePairAltProcessor(
+        const ReadScannerOptions& scanOpt,
+        const SVRefinerOptions& refineOpt,
+        const std::vector<bool>& initIsAlignmentTumor,
+        const SVLocusScanner& initReadScanner,
+        const PairOptions& initPairOpt,
+        const SVCandidateAssemblyData& initAssemblyData,
+        const SVCandidate& initSv,
+        const bool initIsBp1,
+        SVEvidence& initEvidence) :
+        SVScorePairProcessor(initIsAlignmentTumor, initReadScanner, initPairOpt, initSv, initIsBp1, initEvidence),
+        assemblyData(initAssemblyData),
+        _shadowAligner(refineOpt.spanningAlignScores),
+        _shadow(scanOpt.minSingletonMapqCandidates,
+                (! initIsBp1), /// search for left-open shadows
+                (  initIsBp1)), /// search for right-open shadows
+        _contig(initAssemblyData,initSv)
+    {
+        checkInput(sv);
+    }
+
+    /// what to skip in addition to the core skip test?
+    ///
+    /// override to allow for shadow and chimera re-maps for large insertions:
+    ///
+    virtual
+    bool
+    isSkipRecord(
+        const bam_record& bamRead)
+    {
+        if (! isLargeInsertSV(sv)) return SVScorePairProcessor::isSkipRecord(bamRead);
+
+        if (! bamRead.is_paired()) return true;
+        else if (bamRead.is_unmapped() && bamRead.is_mate_unmapped()) return true;
+        return false;
+    }
+
+    void
+    processClearedRecord(
+        const SVId& svId,
+        const bam_record& bamRead,
+        SupportFragments& svSupportFrags);
+
+private:
+    static
+    void
+    checkInput(
+        const SVCandidate& sv);
+
+    /// \param[in] bam record used for debug printout only
+    /// \param[in] isLeftOfInsert is the anchor on the left or right side of the insertion
+    /// \param[in] floatRead the read to be realigned, already revcomped to expected orientation
+    /// \param[in] anchorPos the alignment position of the anchoring (ie. non-relaigned) read of the pair
+    ///
+    /// \return true for usable alignment
+    bool
+    realignPairedRead(
+        const bam_record& bamRead,
+        const bool isLeftOfInsert,
+        const std::string& floatRead,
+        const pos_t anchorPos,
+        int& altTemplateSize);
+
+    bool
+    alignShadowRead(
+        const bam_record& bamRead,
+        int& altTemplateSize);
+
+    /// test whether a frag reference span provides sufficient support for a breakpoint of this sv:
+    bool
+    testFragOverlap(
+        const int fragBeginRefPos,
+        const int fragEndRefPos) const;
+
+    ///////////////////////
+    const SVCandidateAssemblyData& assemblyData;
+
+    const GlobalAligner<int> _shadowAligner;
+    ShadowReadFinder _shadow;
+
+    ContigParams _contig;
+};
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVScorePairProcessor.cpp b/src/c++/lib/applications/GenerateSVCandidates/SVScorePairProcessor.cpp
new file mode 100644
index 0000000..916cb70
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVScorePairProcessor.cpp
@@ -0,0 +1,91 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "SVScorePairProcessor.hh"
+
+
+
+SVScorePairInitParams::
+SVScorePairInitParams(
+    const SVLocusScanner& readScanner,
+    const SVCandidate& sv,
+    const bool isBp1)
+{
+    /// In case of breakend homology approximate the breakend as a point event at the center of the possible range:
+    pos_t centerPos1 = (sv.bp1.interval.range.center_pos());
+    pos_t centerPos2 = (sv.bp2.interval.range.center_pos());
+
+    centerPos = ( isBp1 ? centerPos1 : centerPos2 );
+
+    const bool isBp1Lower(centerPos1 <= centerPos2);
+
+    centerPosA = (isBp1Lower ? centerPos1 : centerPos2);
+    centerPosB = (isBp1Lower ? centerPos2 : centerPos1);
+
+    // total impact of the alt allele on template size, assuming a simple indel:
+    int altInsSize(sv.insertSeq.size());
+    if (sv.isUnknownSizeInsertion)
+    {
+        altInsSize = (sv.unknownSizeInsertionLeftSeq.size() + sv.unknownSizeInsertionRightSeq.size());
+    }
+
+    altShift = ((centerPosB-centerPosA)-altInsSize);
+
+    minMapQ = (readScanner.getMinMapQ());
+    minTier2MapQ = (readScanner.getMinTier2MapQ());
+}
+
+
+
+const GenomeInterval&
+SVScorePairProcessor::
+nextBamIndex(
+    const unsigned bamIndex)
+{
+    bamParams.isSet = true;
+    bamParams.bamIndex = bamIndex;
+    bamParams.isTumor = (isAlignmentTumor[bamIndex]);
+
+    // set the search range around centerPos so that we can get any fragments at the Xth percentile length or smaller which could have
+    // min Fragsupport
+    const SVLocusScanner::Range& pRange(readScanner.getEvidencePairRange(bamIndex));
+    bamParams.minFrag = (static_cast<pos_t>(pRange.min));
+    bamParams.maxFrag = (static_cast<pos_t>(pRange.max));
+
+    const pos_t maxSupportedFrag(bamParams.maxFrag-pairOpt.minFragSupport);
+
+    const pos_t beginPos(svParams.centerPos-maxSupportedFrag);
+    const pos_t endPos(svParams.centerPos+maxSupportedFrag+1);
+#ifdef DEBUG_MEGAPAIR
+    log_os << __FUNCTION__ << ": pair scan begin/end: " << beginPos << " " << endPos << "\n";
+#endif
+
+    bamParams.fragDistroPtr = &(readScanner.getFragSizeDistro(bamIndex));
+
+    // set bam stream to new search interval:
+    const SVBreakend bp( isBp1 ? sv.bp1 : sv.bp2 );
+    bamParams.interval = GenomeInterval(bp.interval.tid, beginPos, endPos);
+
+    return bamParams.interval;
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVScorePairProcessor.hh b/src/c++/lib/applications/GenerateSVCandidates/SVScorePairProcessor.hh
new file mode 100644
index 0000000..a3b251d
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVScorePairProcessor.hh
@@ -0,0 +1,180 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "SVEvidence.hh"
+#include "SVScorerPairOptions.hh"
+#include "SVScorerShared.hh"
+#include "SVSupports.hh"
+
+#include "blt_util/SizeDistribution.hh"
+
+#include "manta/BamRegionProcessor.hh"
+#include "manta/SVCandidate.hh"
+#include "manta/SVLocusScanner.hh"
+#include "manta/JunctionIdGenerator.hh"
+
+
+struct SVScorePairInitParams
+{
+    SVScorePairInitParams(
+        const SVLocusScanner& readScanner,
+        const SVCandidate& sv,
+        const bool isBp1);
+
+    pos_t centerPosA;
+    pos_t centerPosB;
+    pos_t centerPos;
+
+    // total impact of the alt allele on template size:
+    pos_t altShift;
+    unsigned minMapQ;
+    unsigned minTier2MapQ; // a second, lower mapq threshold used to disprove a somatic allele during tumor/normal calling
+};
+
+
+struct SVScorePairBamParams
+{
+    bool isSet = false;
+    unsigned bamIndex = 0;
+    bool isTumor = false;
+    pos_t minFrag = 0;
+    pos_t maxFrag = 0;
+    const SizeDistribution* fragDistroPtr = nullptr;
+    GenomeInterval interval;
+};
+
+
+struct SVScorePairProcessor : public BamRegionProcessor
+{
+    SVScorePairProcessor(
+        const std::vector<bool>& initIsAlignmentTumor,
+        const SVLocusScanner& initReadScanner,
+        const PairOptions& initPairOpt,
+        const SVCandidate& initSv,
+        const bool initIsBp1,
+        SVEvidence& initEvidence) :
+        isAlignmentTumor(initIsAlignmentTumor),
+        readScanner(initReadScanner),
+        pairOpt(initPairOpt),
+        sv(initSv),
+        isBp1(initIsBp1),
+        evidence(initEvidence),
+        svParams(readScanner, sv, isBp1),
+        bamParams()
+    {}
+
+    const GenomeInterval&
+    nextBamIndex(
+        const unsigned bamIndex);
+
+    /*
+    void
+    processRecord(
+            const bam_record& bamRead)
+    {
+    	if (isSkipRecordCore(bamRead)) return;
+    	if (isSkipRecord(bamRead)) return;
+
+    	processClearedRecord(bamRead);
+    }
+    */
+
+    // alternate interface
+    static
+    bool
+    isSkipRecordCore(
+        const bam_record& bamRead)
+    {
+        return (SVLocusScanner::isReadFilteredCore(bamRead) || bamRead.isNonStrictSupplement());
+    }
+
+    /// what to skip in addition to the core skip test?
+    virtual
+    bool
+    isSkipRecord(
+        const bam_record& bamRead)
+    {
+        if (bamRead.is_unmapped() || (bamRead.is_paired() && bamRead.is_mate_unmapped())) return true;
+        else if (! is_innie_pair(bamRead)) return true;
+        return false;
+    }
+
+    // process a record for which isSkipRecord() == false
+    virtual
+    void
+    processClearedRecord(
+        const SVId& svId,
+        const bam_record& bamRead,
+        SupportFragments& svSupportFrags) = 0;
+
+    static
+    bool
+    isLargeInsertSV(
+        const SVCandidate& sv)
+    {
+        return (sv.insertSeq.size() >= 100 );
+    }
+
+protected:
+
+    static
+    void
+    setAlleleFrag(
+        const SizeDistribution& fragDistro,
+        const int size,
+        SVFragmentEvidenceAlleleBreakend& bp,
+        const bool /*isPdf*/ = false)
+    {
+        float fragProb(0);
+#if 0
+        if (isPdf)
+        {
+            fragProb = fragDistro.pdf(size);
+        }
+        else
+#endif
+        {
+            fragProb = fragDistro.cdf(size);
+            fragProb = std::min(fragProb, (1-fragProb));
+        }
+#ifdef DEBUG_MEGAPAIR
+        log_os << __FUNCTION__ << ": fraglen,prob " << size << " " << fragProb << "\n";
+#endif
+
+        bp.isFragmentSupport = true;
+        bp.fragLengthProb = fragProb;
+    }
+
+    const std::vector<bool> isAlignmentTumor;
+    const SVLocusScanner& readScanner;
+    const PairOptions& pairOpt;
+    const SVCandidate& sv;
+    const bool isBp1;
+    SVEvidence& evidence;
+
+    const SVScorePairInitParams svParams;
+    SVScorePairBamParams bamParams;
+};
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVScorePairRefProcessor.cpp b/src/c++/lib/applications/GenerateSVCandidates/SVScorePairRefProcessor.cpp
new file mode 100644
index 0000000..7f41163
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVScorePairRefProcessor.cpp
@@ -0,0 +1,107 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders and Xiaoyu Chen
+///
+
+#include "SVScorePairRefProcessor.hh"
+#include "common/Exceptions.hh"
+#include "htsapi/bam_record_util.hh"
+#include "manta/SVCandidateUtil.hh"
+
+#include <cassert>
+
+#include <sstream>
+
+
+/// standard debug output for this file:
+//#define DEBUG_PAIR
+
+/// ridiculous debug output for this file:
+//#define DEBUG_MEGAPAIR
+
+#ifdef DEBUG_PAIR
+#include "blt_util/log.hh"
+#endif
+
+
+
+void
+SVScorePairRefProcessor::
+processClearedRecord(
+    const SVId& /*svId*/,
+    const bam_record& bamRead,
+    SupportFragments& /*svSupportFrags*/)
+{
+    using namespace illumina::common;
+
+    assert(bamParams.isSet);
+
+    const pos_t refPos(bamRead.pos()-1);
+    if (! bamParams.interval.range.is_pos_intersect(refPos)) return;
+
+    const bool isLargeInsert(isLargeInsertSV(sv));
+
+#ifdef DEBUG_MEGAPAIR
+    log_os << __FUNCTION__ << ": read: " << bamRead << "\n";
+#endif
+
+    /// check if fragment is too big or too small:
+    const int templateSize(std::abs(bamRead.template_size()));
+    if (templateSize < bamParams.minFrag) return;
+    if (templateSize > bamParams.maxFrag) return;
+
+    // count only from the down stream reads
+    const bool isFirstBamRead(isFirstRead(bamRead));
+
+    // get fragment range:
+    pos_t fragBeginRefPos(refPos);
+    if (! isFirstBamRead)
+    {
+        fragBeginRefPos=bamRead.mate_pos()-1;
+    }
+
+    const pos_t fragEndRefPos(fragBeginRefPos+templateSize);
+
+    if (fragBeginRefPos > fragEndRefPos)
+    {
+        std::ostringstream oss;
+        oss << "ERROR: Failed to parse fragment range from bam record. Frag begin,end: " << fragBeginRefPos << " " << fragEndRefPos << " bamRecord: " << bamRead << "\n";
+        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+    }
+
+    {
+        const pos_t fragOverlap(std::min((1+svParams.centerPos-fragBeginRefPos), (fragEndRefPos-svParams.centerPos)));
+#ifdef DEBUG_MEGAPAIR
+        log_os << __FUNCTION__ << ": frag begin/end/overlap: " << fragBeginRefPos << " " << fragEndRefPos << " " << fragOverlap << "\n";
+#endif
+        if (fragOverlap < pairOpt.minFragSupport) return;
+    }
+
+    SVFragmentEvidence& fragment(evidence.getSampleEvidence(bamParams.bamIndex)[bamRead.qname()]);
+
+    static const bool isShadow(false);
+
+    SVFragmentEvidenceRead& evRead(fragment.getRead(bamRead.is_first()));
+    setReadEvidence(svParams.minMapQ, svParams.minTier2MapQ, bamRead, isShadow, evRead);
+
+    setAlleleFrag(*bamParams.fragDistroPtr, templateSize, fragment.ref.getBp(isBp1),isLargeInsert);
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVScorePairRefProcessor.hh b/src/c++/lib/applications/GenerateSVCandidates/SVScorePairRefProcessor.hh
new file mode 100644
index 0000000..05a822b
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVScorePairRefProcessor.hh
@@ -0,0 +1,47 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "SVScorePairProcessor.hh"
+
+
+struct SVScorePairRefProcessor : public SVScorePairProcessor
+{
+    SVScorePairRefProcessor(
+        const std::vector<bool>& initIsAlignmentTumor,
+        const SVLocusScanner& initReadScanner,
+        const PairOptions& initPairOpt,
+        const SVCandidate& initSv,
+        const bool initIsBp1,
+        SVEvidence& initEvidence) :
+        SVScorePairProcessor(initIsAlignmentTumor, initReadScanner, initPairOpt, initSv, initIsBp1, initEvidence)
+    {}
+
+    void
+    processClearedRecord(
+        const SVId& svId,
+        const bam_record& bamRead,
+        SupportFragments& svSupportFrags);
+};
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVScorer.cpp b/src/c++/lib/applications/GenerateSVCandidates/SVScorer.cpp
new file mode 100644
index 0000000..e28ed52
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVScorer.cpp
@@ -0,0 +1,1952 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders and Xiaoyu Chen
+///
+
+#include "SVScorer.hh"
+#include "SVScorePairAltProcessor.hh"
+
+#include "blt_util/LinearScaler.hh"
+#include "blt_util/math_util.hh"
+#include "blt_util/prob_util.hh"
+#include "blt_util/qscore.hh"
+#include "common/Exceptions.hh"
+#include "htsapi/align_path_bam_util.hh"
+#include "htsapi/bam_header_util.hh"
+#include "htsapi/bam_streamer.hh"
+#include "manta/ReadGroupStatsSet.hh"
+#include "manta/SVCandidateUtil.hh"
+
+#include <algorithm>
+#include <iostream>
+#include <string>
+
+
+//#define DEBUG_SCORE
+//#define DEBUG_SOMATIC_SCORE
+
+#if defined(DEBUG_SCORE) || defined(DEBUG_SOMATIC_SCORE)
+#define ANY_DEBUG_SCORE
+#endif
+
+#ifdef ANY_DEBUG_SCORE
+#include "blt_util/log.hh"
+#endif
+
+
+
+
+SVScorer::
+SVScorer(
+    const GSCOptions& opt,
+    const SVLocusScanner& readScanner,
+    const bam_header_info& header) :
+    _isAlignmentTumor(opt.alignFileOpt.isAlignmentTumor),
+    _isRNA(opt.isRNA),
+    _callOpt(opt.callOpt),
+    _callDopt(_callOpt),
+    _diploidOpt(opt.diploidOpt),
+    _diploidDopt(_diploidOpt),
+    _scanOpt(opt.scanOpt),
+    _refineOpt(opt.refineOpt),
+    _somaticOpt(opt.somaticOpt),
+    _somaticDopt(_somaticOpt),
+    _tumorOpt(opt.tumorOpt),
+    _dFilterDiploid(opt.chromDepthFilename, _diploidOpt.maxDepthFactor, header),
+    _dFilterSomatic(opt.chromDepthFilename, _somaticOpt.maxDepthFactor, header),
+    _dFilterTumor(opt.chromDepthFilename, _tumorOpt.maxDepthFactor, header),
+    _readScanner(readScanner)
+{
+    // setup regionless bam_streams:
+    // setup all data for main analysis loop:
+    for (const std::string& afile : opt.alignFileOpt.alignmentFilename)
+    {
+        // avoid creating shared_ptr temporaries:
+        streamPtr tmp(new bam_streamer(afile.c_str()));
+        _bamStreams.push_back(tmp);
+    }
+
+    _sampleCount=0;
+    _diploidSampleCount=0;
+    for (const bool isTumor : opt.alignFileOpt.isAlignmentTumor)
+    {
+        _sampleCount ++;
+        if (! isTumor) _diploidSampleCount++;
+    }
+
+    // initialize sampleNames from all bam headers (assuming 1 sample per bam for now)
+    const unsigned bamCount(_bamStreams.size());
+    for (unsigned bamIndex(0); bamIndex<bamCount; ++bamIndex)
+    {
+        const bam_hdr_t& indexHeader(_bamStreams[bamIndex]->get_header());
+        std::ostringstream defaultName;
+        defaultName << "SAMPLE" << (bamIndex+1);
+        std::string sampleName(get_bam_header_sample_name(indexHeader, defaultName.str().c_str()));
+        // remove spaces from sample name
+        std::replace(sampleName.begin(), sampleName.end(), ' ', '_');
+        _sampleNames.push_back(sampleName);
+    }
+}
+
+
+
+/// add bam alignment to simple short-range vector depth estimate
+///
+/// \param[in] beginPos this is the begin position of the range covered by the depth array
+///
+static
+void
+addReadToDepthEst(
+    const bam_record& bamRead,
+    const pos_t beginPos,
+    std::vector<unsigned>& depth)
+{
+    using namespace ALIGNPATH;
+
+    const pos_t endPos(beginPos+depth.size());
+
+    // get cigar:
+    path_t apath;
+    bam_cigar_to_apath(bamRead.raw_cigar(), bamRead.n_cigar(), apath);
+
+    pos_t refPos(bamRead.pos()-1);
+    for (const path_segment& ps : apath)
+    {
+        if (refPos>=endPos) return;
+
+        if (is_segment_align_match(ps.type))
+        {
+            for (pos_t pos(refPos); pos < (refPos+static_cast<pos_t>(ps.length)); ++pos)
+            {
+                if (pos>=beginPos)
+                {
+                    if (pos>=endPos) return;
+                    depth[pos-beginPos]++;
+                }
+            }
+        }
+        if (is_segment_type_ref_length(ps.type)) refPos += ps.length;
+    }
+}
+
+
+
+void
+SVScorer::
+getBreakendMaxMappedDepthAndMQ0(
+    const bool isTumorOnly,
+    const bool isMaxDepth,
+    const double cutoffDepth,
+    const SVBreakend& bp,
+    unsigned& maxDepth,
+    float& MQ0Frac)
+{
+    /// define a new interval -/+ 50 bases around the center pos
+    /// of the breakpoint
+    static const pos_t regionSize(50);
+
+    maxDepth=0;
+    MQ0Frac=0;
+
+    unsigned totalReads(0);
+    unsigned totalMQ0Reads(0);
+
+    const pos_t centerPos(bp.interval.range.center_pos());
+    const known_pos_range2 searchRange(std::max((centerPos-regionSize),0), (centerPos+regionSize));
+
+    if (searchRange.size() == 0) return;
+
+    std::vector<unsigned> depth(searchRange.size(),0);
+
+    bool isCutoff(false);
+    bool isBamFound(false);
+
+    const unsigned bamCount(_bamStreams.size());
+    for (unsigned bamIndex(0); bamIndex < bamCount; ++bamIndex)
+    {
+        if ((!isTumorOnly) && (_isAlignmentTumor[bamIndex])) continue;
+        isBamFound=true;
+
+        bam_streamer& bamStream(*_bamStreams[bamIndex]);
+
+        // set bam stream to new search interval:
+        bamStream.set_new_region(bp.interval.tid, searchRange.begin_pos(), searchRange.end_pos());
+
+        while (bamStream.next())
+        {
+            const bam_record& bamRead(*(bamStream.get_record_ptr()));
+
+            // turn filtration down to mapped only to match depth estimate method:
+            if (bamRead.is_unmapped()) continue;
+
+            const pos_t refPos(bamRead.pos()-1);
+            if (refPos >= searchRange.end_pos()) break;
+
+            addReadToDepthEst(bamRead,searchRange.begin_pos(),depth);
+
+            totalReads++;
+            if (0 == bamRead.map_qual()) totalMQ0Reads++;
+
+            if (isMaxDepth)
+            {
+                const pos_t depthOffset(refPos-searchRange.begin_pos());
+                if (depthOffset>=0)
+                {
+                    if (depth[depthOffset] > cutoffDepth)
+                    {
+                        isCutoff=true;
+                        break;
+                    }
+                }
+            }
+        }
+
+        if (isCutoff) break;
+    }
+
+    assert(isBamFound);
+
+    maxDepth = *(std::max_element(depth.begin(),depth.end()));
+    if (totalReads>=10)
+    {
+        MQ0Frac = static_cast<float>(totalMQ0Reads)/static_cast<float>(totalReads);
+    }
+}
+
+
+
+static
+void
+lnToProb(
+    float& lower,
+    float& higher)
+{
+    lower = std::exp(lower-higher);
+    higher = 1/(lower+1);
+    lower  = lower/(lower+1);
+}
+
+
+
+/// return false if no split read support
+static
+bool
+getSampleSplitReadLnLhood(
+    const SVFragmentEvidence& fragev,
+    const bool isRead1,
+    float& refLnLhood,
+    float& altLnLhood,
+    const bool isForcedSupport = false)
+{
+    refLnLhood = 1.;
+    altLnLhood = 1.;
+
+    const std::pair<bool,bool> isBpSupport(fragev.isAnySplitReadSupport(isRead1));
+    if (! isForcedSupport)
+    {
+        if (! (isBpSupport.first || isBpSupport.second)) return false;
+    }
+
+    bool isUseBp1Score(isBpSupport.first);
+
+    if (isForcedSupport || (isBpSupport.first == isBpSupport.second))
+    {
+        isUseBp1Score = (fragev.alt.bp1.getRead(isRead1).splitLnLhood >= fragev.alt.bp2.getRead(isRead1).splitLnLhood);
+    }
+
+    altLnLhood =
+        ( isUseBp1Score ?
+          fragev.alt.bp1.getRead(isRead1).splitLnLhood :
+          fragev.alt.bp2.getRead(isRead1).splitLnLhood);
+
+    if (isBpSupport.first && isBpSupport.second)
+    {
+        isUseBp1Score = (fragev.ref.bp1.getRead(isRead1).splitLnLhood >= fragev.ref.bp2.getRead(isRead1).splitLnLhood);
+    }
+
+    refLnLhood =
+        ( isUseBp1Score ?
+          fragev.ref.bp1.getRead(isRead1).splitLnLhood :
+          fragev.ref.bp2.getRead(isRead1).splitLnLhood);
+
+    return true;
+}
+
+
+
+static
+void
+addConservativeSplitReadSupport(
+    const SVFragmentEvidence& fragev,
+    const bool isRead1,
+    SVSampleInfo& sampleBaseInfo)
+{
+    static const float splitSupportProb(0.999f);
+
+    // only consider reads where at least one allele and one breakend is confident
+    //
+    // ...note this is done in the absence of having a noise state in the model
+    //
+    float refLnLhood;
+    float altLnLhood;
+    if (! getSampleSplitReadLnLhood(fragev, isRead1, refLnLhood, altLnLhood)) return;
+
+    // convert to normalized prob:
+    if (altLnLhood > refLnLhood)
+    {
+        lnToProb(refLnLhood, altLnLhood);
+        if (altLnLhood > splitSupportProb) sampleBaseInfo.alt.confidentSplitReadCount++;
+
+
+    }
+    else
+    {
+        lnToProb(altLnLhood, refLnLhood);
+        if (refLnLhood > splitSupportProb)
+        {
+            sampleBaseInfo.ref.confidentSplitReadCount++;
+            if (fragev.ref.bp1.getRead(isRead1).isSplitSupport) sampleBaseInfo.ref.confidentSplitReadAndPairCountRefBp1++;
+            if (fragev.ref.bp2.getRead(isRead1).isSplitSupport) sampleBaseInfo.ref.confidentSplitReadAndPairCountRefBp2++;
+        }
+    }
+}
+
+
+
+static
+float
+getSpanningPairAlleleLhood(
+    const SVFragmentEvidenceAllele& allele)
+{
+    float fragProb(0);
+    if (allele.bp1.isFragmentSupport)
+    {
+        fragProb = allele.bp1.fragLengthProb;
+    }
+
+    if (allele.bp2.isFragmentSupport)
+    {
+        fragProb = std::max(fragProb, allele.bp2.fragLengthProb);
+    }
+
+    return fragProb;
+}
+
+static
+void
+addSpanningPairSupport(
+    const SVFragmentEvidence& fragev,
+    SVSampleInfo& sampleBaseInfo)
+{
+    if (fragev.alt.bp1.isFragmentSupport || fragev.alt.bp2.isFragmentSupport)
+    {
+        sampleBaseInfo.alt.spanningPairCount++;
+    }
+    if (fragev.ref.bp1.isFragmentSupport || fragev.ref.bp2.isFragmentSupport)
+    {
+        sampleBaseInfo.ref.spanningPairCount++;
+    }
+}
+
+static
+void
+addConservativeSpanningPairSupport(
+    const SVFragmentEvidence& fragev,
+    SVSampleInfo& sampleBaseInfo)
+{
+    static const float pairSupportProb(0.9f);
+
+    if (! fragev.isAnySpanningPairSupport()) return;
+
+    float altLhood(getSpanningPairAlleleLhood(fragev.alt));
+    float refLhood(getSpanningPairAlleleLhood(fragev.ref));
+
+    assert(altLhood >= 0);
+    assert(refLhood >= 0);
+    if ((altLhood <= 0) && (refLhood <= 0))
+    {
+        using namespace illumina::common;
+
+        std::ostringstream oss;
+        oss << "ERROR: Spanning likelihood is zero for all alleles. Fragment: " << fragev << "\n";
+        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+    }
+
+    static const bool isTier2(false);
+    const bool isFullyMapped(fragev.read1.isObservedAnchor(isTier2) && fragev.read2.isObservedAnchor(isTier2));
+
+    // convert to normalized prob:
+    const float sum(altLhood+refLhood);
+    if (altLhood > refLhood)
+    {
+        if ((altLhood/sum) > pairSupportProb)
+        {
+#ifdef DEBUG_SCORE
+            log_os << __FUNCTION__ << ": semi-mapped alt pair support\n";
+#endif
+            sampleBaseInfo.alt.confidentSemiMappedSpanningPairCount++;
+            if (isFullyMapped)
+            {
+#ifdef DEBUG_SCORE
+                log_os << __FUNCTION__ << ": fully-mapped alt pair support\n";
+#endif
+                sampleBaseInfo.alt.confidentSpanningPairCount++;
+            }
+        }
+
+
+    }
+    else
+    {
+        if ((refLhood/sum) > pairSupportProb)
+        {
+#ifdef DEBUG_SCORE
+            log_os << __FUNCTION__ << ": semi-mapped ref pair support\n";
+#endif
+            sampleBaseInfo.ref.confidentSemiMappedSpanningPairCount++;
+            if (isFullyMapped)
+            {
+#ifdef DEBUG_SCORE
+                log_os << __FUNCTION__ << ": fully-mapped ref pair support\n";
+#endif
+                sampleBaseInfo.ref.confidentSpanningPairCount++;
+                if (fragev.ref.bp1.isFragmentSupport) sampleBaseInfo.ref.confidentSplitReadAndPairCountRefBp1++;
+                if (fragev.ref.bp2.isFragmentSupport) sampleBaseInfo.ref.confidentSplitReadAndPairCountRefBp2++;
+            }
+        }
+    }
+}
+
+
+
+static
+void
+getSampleCounts(
+    const SVEvidence::evidenceTrack_t& sampleEvidence,
+    SVSampleInfo& sampleBaseInfo)
+{
+    for (const SVEvidence::evidenceTrack_t::value_type& val : sampleEvidence)
+    {
+        const SVFragmentEvidence& fragev(val.second);
+#ifdef DEBUG_SCORE
+        log_os << __FUNCTION__ << ": Counting read: " << val.first << "\n";
+#endif
+        // evaluate read1 and read2 from this fragment
+        //
+        addConservativeSplitReadSupport(fragev,true,sampleBaseInfo);
+        addConservativeSplitReadSupport(fragev,false,sampleBaseInfo);
+        addSpanningPairSupport(fragev, sampleBaseInfo);
+        addConservativeSpanningPairSupport(fragev, sampleBaseInfo);
+    }
+}
+
+
+
+/// get conservative count of reads which support only one allele, ie. P ( allele | read ) is high
+///
+static
+void
+getSVSupportSummary(
+    const SVEvidence& evidence,
+    SVScoreInfo& baseInfo)
+{
+    const unsigned sampleCount(baseInfo.samples.size());
+    assert(sampleCount == evidence.samples.size());
+
+    for (unsigned sampleIndex(0); sampleIndex<sampleCount; ++sampleIndex)
+    {
+        getSampleCounts(evidence.getSampleEvidence(sampleIndex), baseInfo.samples[sampleIndex]);
+    }
+}
+
+
+
+static
+void
+resolvePairSplitConflictsSample(
+    const bool isFindAltPairConflict,
+    SVEvidence::evidenceTrack_t& sampleEvidence)
+{
+    for (SVEvidence::evidenceTrack_t::value_type& val : sampleEvidence)
+    {
+#ifdef DEBUG_SCORE
+        log_os << __FUNCTION__ << ": conflict check for " << val.first << "\n";
+#endif
+        SVFragmentEvidence& fragev(val.second);
+
+        /// filtration scheme only works if there's pair and split support for the same fragment:
+        if (! fragev.isAnySpanningPairSupport()) continue;
+        //    if (! (fragev.isAnySplitReadSupport(true) || fragev.isAnySplitReadSupport(false))) continue;
+
+        // if there's a difference in fragment support for one "frag-favored" allele, then
+        // there must also be either neutral split support or split support in favor of the same allele
+
+        const float refPairLhood(getSpanningPairAlleleLhood(fragev.ref));
+        const float altPairLhood(getSpanningPairAlleleLhood(fragev.alt));
+
+        static const bool isForcedSupport(true);
+        float refSplitLnLhoodRead1;
+        float altSplitLnLhoodRead1;
+        const bool isRead1Split(getSampleSplitReadLnLhood(fragev, true, refSplitLnLhoodRead1, altSplitLnLhoodRead1, isForcedSupport));
+
+        float refSplitLnLhoodRead2;
+        float altSplitLnLhoodRead2;
+        const bool isRead2Split(getSampleSplitReadLnLhood(fragev, false, refSplitLnLhoodRead2, altSplitLnLhoodRead2, isForcedSupport));
+
+#ifdef DEBUG_SCORE
+        log_os << __FUNCTION__ << ": fragev " << fragev << "\n";
+        log_os << __FUNCTION__ << ": r1/r2 " << isRead1Split << " " << isRead2Split << "\n";
+#endif
+        const bool isRefPair(refPairLhood > altPairLhood);
+        const bool isAltPair(altPairLhood > refPairLhood);
+
+        if (isAltPair)
+        {
+            if (! isFindAltPairConflict) continue;
+        }
+
+        if (isRead1Split)
+        {
+            if (altSplitLnLhoodRead1 > refSplitLnLhoodRead1)
+            {
+                if (isRefPair)
+                {
+#ifdef DEBUG_SCORE
+                    log_os << __FUNCTION__ << ": clearing alt1/ref\n";
+#endif
+                    fragev.clearPairSupport();
+                }
+            }
+            if (refSplitLnLhoodRead1 > altSplitLnLhoodRead1)
+            {
+                if (isAltPair)
+                {
+#ifdef DEBUG_SCORE
+                    log_os << __FUNCTION__ << ": clearing ref1/alt\n";
+#endif
+                    fragev.clearPairSupport();
+                }
+            }
+        }
+
+        if (isRead2Split)
+        {
+            if (altSplitLnLhoodRead2 > refSplitLnLhoodRead2)
+            {
+                if (isRefPair)
+                {
+#ifdef DEBUG_SCORE
+                    log_os << __FUNCTION__ << ": clearing alt2/ref\n";
+#endif
+                    fragev.clearPairSupport();
+                }
+            }
+            if (refSplitLnLhoodRead2 > altSplitLnLhoodRead2)
+            {
+                if (isAltPair)
+                {
+#ifdef DEBUG_SCORE
+                    log_os << __FUNCTION__ << ": clearing ref2/alt\n";
+#endif
+                    fragev.clearPairSupport();
+                }
+            }
+        }
+    }
+}
+
+
+
+/// check for cases where pair support was added in error, the fragment does span the breakpoint, but the
+/// alignment past the breakpoint is poor, and is better in the alt allele.
+///
+/// note this might be done more naturally during the pair computation, but all the info we need is added
+/// during the split routine, so it's at least checked here as well:
+static
+void
+resolvePairSplitConflicts(
+    const SVCandidate& sv,
+    SVEvidence& evidence)
+{
+    if (sv.isImprecise()) return;
+
+    static const pos_t maxAltPairConflictSearch(1000);
+    const bool isFindAltPairConflict(sv.centerSize() <= maxAltPairConflictSearch);
+
+    const unsigned sampleCount(evidence.size());
+
+    for (unsigned sampleIndex(0); sampleIndex<sampleCount; ++sampleIndex)
+    {
+        resolvePairSplitConflictsSample(isFindAltPairConflict, evidence.getSampleEvidence(sampleIndex));
+    }
+}
+
+
+
+/// shared information gathering steps of all scoring models
+void
+SVScorer::
+scoreSV(
+    const SVCandidateSetData& svData,
+    const SVCandidateAssemblyData& assemblyData,
+    const bool isTumorOnly,
+    const SVCandidate& sv,
+    const SVId& svId,
+    SVScoreInfo& baseInfo,
+    SVEvidence& evidence,
+    SupportSamples& svSupports)
+{
+    // at what factor above the maxDepth FILTER criteria do we stop enumerating scoring components?
+    static const unsigned cutoffDepthFactor(2);
+
+    const bool isMaxDepth(isTumorOnly ?
+                          _dFilterTumor.isMaxDepthFilter() :
+                          (_dFilterDiploid.isMaxDepthFilter() && _dFilterSomatic.isMaxDepthFilter()));
+
+    double bp1CutoffDepth(0);
+    double bp2CutoffDepth(0);
+    if (isMaxDepth)
+    {
+        const double bp1MaxMaxDepth(std::max(_dFilterDiploid.maxDepth(sv.bp1.interval.tid), _dFilterSomatic.maxDepth(sv.bp1.interval.tid)));
+        const double bp2MaxMaxDepth(std::max(_dFilterDiploid.maxDepth(sv.bp2.interval.tid), _dFilterSomatic.maxDepth(sv.bp2.interval.tid)));
+
+        bp1CutoffDepth = cutoffDepthFactor*bp1MaxMaxDepth;
+        bp2CutoffDepth = cutoffDepthFactor*bp2MaxMaxDepth;
+    }
+
+    // get breakend center_pos depth estimate:
+    getBreakendMaxMappedDepthAndMQ0(isTumorOnly, isMaxDepth, bp1CutoffDepth, sv.bp1, baseInfo.bp1MaxDepth, baseInfo.bp1MQ0Frac);
+    const bool isBp1OverDepth(baseInfo.bp1MaxDepth > bp1CutoffDepth);
+    if (! (isMaxDepth && isBp1OverDepth))
+    {
+        getBreakendMaxMappedDepthAndMQ0(isTumorOnly, isMaxDepth, bp2CutoffDepth, sv.bp2, baseInfo.bp2MaxDepth, baseInfo.bp2MQ0Frac);
+    }
+    const bool isBp2OverDepth(baseInfo.bp2MaxDepth > bp2CutoffDepth);
+    const bool isOverDepth(isBp1OverDepth || isBp2OverDepth);
+    const bool isSkipEvidenceSearch(isMaxDepth && isOverDepth);
+
+    if (! isSkipEvidenceSearch)
+    {
+        // count the paired-read fragments supporting the ref and alt alleles in each sample:
+        //
+        getSVPairSupport(svData, assemblyData, sv, svId, evidence, svSupports);
+
+        // count the split reads supporting the ref and alt alleles in each sample
+        //
+        getSVSplitReadSupport(assemblyData, sv, svId, baseInfo, evidence, svSupports);
+
+        // fix erroneous pair support based on split evidence:
+        resolvePairSplitConflicts(sv, evidence);
+    }
+
+    // compute allele likelihoods, and any other summary metric shared between all models:
+    //
+    getSVSupportSummary(evidence, baseInfo);
+}
+
+
+
+/// record a set of convenient companion values for any probability
+///
+struct ProbSet
+{
+    explicit
+    ProbSet(const double initProb) :
+        prob(initProb),
+        comp(1-prob),
+        lnProb(std::log(prob)),
+        lnComp(std::log(comp))
+    {}
+
+    double prob;
+    double comp;
+    double lnProb;
+    double lnComp;
+};
+
+
+
+static
+void
+incrementSpanningPairAlleleLnLhood(
+    const ProbSet& selfChimeraProb,
+    const ProbSet& otherChimeraProb,
+    const SVFragmentEvidenceAllele& allele,
+    const double power,
+    double& bpLnLhood)
+{
+    const float fragProb(getSpanningPairAlleleLhood(allele));
+    bpLnLhood += std::log(selfChimeraProb.comp*fragProb + otherChimeraProb.prob)*power;
+}
+
+
+
+static
+double
+incrementAlleleSplitReadLhood(
+    const ProbSet& selfMapProb,
+    const ProbSet& otherMapProb,
+    const SVFragmentEvidenceAllele& allele,
+    const double /*readLnPrior*/,
+    const std::pair<bool,bool>& isSupported,
+    const bool isRead1,
+    bool& isReadEvaluated)
+{
+    if (! (allele.bp1.getRead(isRead1).isSplitEvaluated &&
+           allele.bp2.getRead(isRead1).isSplitEvaluated))
+    {
+        isReadEvaluated = false;
+    }
+
+    const double alignBp1LnLhood(allele.bp1.getRead(isRead1).splitLnLhood);
+    const double alignBp2LnLhood(allele.bp2.getRead(isRead1).splitLnLhood);
+
+    bool isUseBp1Lhood(isSupported.first);
+    if (isSupported.first && isSupported.second)
+    {
+        isUseBp1Lhood=(alignBp1LnLhood >= alignBp2LnLhood);
+    }
+
+    const double alignLnLhood( isUseBp1Lhood ? alignBp1LnLhood : alignBp2LnLhood );
+
+    const double fragLnLhood = log_sum((selfMapProb.lnComp+alignLnLhood), (otherMapProb.lnProb)); //+readLnPrior));
+
+#ifdef DEBUG_SCORE
+    static const std::string logtag("incrementAlleleSplitReadLhood: ");
+    log_os << logtag //<< "readPrior: " << readLnPrior
+           << " isRead1?: " << isRead1 << "\n";
+    log_os << logtag << "isEval " << isReadEvaluated << "\n";
+    log_os << logtag << "alignBp1LnLhood " << alignBp1LnLhood << "\n";
+    log_os << logtag << "alignBp2LnLhood " << alignBp2LnLhood << "\n";
+    log_os << logtag << "selfMap " << selfMapProb.lnProb << "\n";
+    log_os << logtag << "otherMap " << otherMapProb.lnProb << "\n";
+    log_os << logtag << "increment " << fragLnLhood << "\n";
+#endif
+
+    return fragLnLhood;
+}
+
+
+
+static
+void
+incrementSplitReadLhood(
+    const std::string& /*fragLabel*/,
+    const SVFragmentEvidence& fragev,
+    const ProbSet& refMapProb,
+    const ProbSet& altMapProb,
+    const bool isPermissive,
+    const bool isRead1,
+    double& refSplitLnLhood,
+    double& altSplitLnLhood,
+    bool& isReadEvaluated)
+{
+    static const double baseLnPrior(std::log(0.25));
+
+#ifdef DEBUG_SCORE
+    log_os << __FUNCTION__ << ": pre-support\n";
+#endif
+
+    std::pair<bool,bool> isSupported;
+    if (isPermissive)
+    {
+        isSupported=fragev.isAnyTier2SplitReadSupport(isRead1);
+    }
+    else
+    {
+        isSupported=fragev.isAnySplitReadSupport(isRead1);
+    }
+
+    if (! (isSupported.first || isSupported.second))
+    {
+        isReadEvaluated = false;
+        return;
+    }
+
+#ifdef DEBUG_SCORE
+    log_os << __FUNCTION__ << ": post-support\n";
+#endif
+
+    const unsigned readSize(fragev.getRead(isRead1).size);
+    const double readLnPrior(baseLnPrior*readSize);
+
+#ifdef DEBUG_SCORE
+    log_os << __FUNCTION__ << ": starting ref\n";
+#endif
+    const double refSplit = incrementAlleleSplitReadLhood(refMapProb, altMapProb, fragev.ref, readLnPrior, isSupported, isRead1, isReadEvaluated);
+#ifdef DEBUG_SCORE
+    log_os << __FUNCTION__ << ": starting alt\n";
+#endif
+    const double altSplit = incrementAlleleSplitReadLhood(altMapProb, refMapProb, fragev.alt, readLnPrior, isSupported, isRead1, isReadEvaluated);
+
+    // filter out split read evidence with a poor alignment to both alleles:
+    /// TODO: fraction of these noise reads could be informative for filtration
+    // if ((refSplit < (altMapProb.lnProb+1)) && (altSplit < (refMapProb.lnProb+1))) return;
+
+    refSplitLnLhood += refSplit;
+    altSplitLnLhood += altSplit;
+}
+
+
+
+struct AlleleLnLhood
+{
+    double fragPair = 0.;
+    double read1Split = 0.;
+    double read2Split = 0.;
+};
+
+
+
+static
+double
+getFragLnLhood(
+    const AlleleLnLhood& al,
+    const bool isRead1Evaluated,
+    const bool isRead2Evaluated)
+{
+#ifdef DEBUG_SCORE
+    log_os << "getFragLnLhood: frag/read1/read2 " << al.fragPair << " " << al.read1Split << " " << al.read2Split << "\n";
+    log_os << "getFragLnLhood: isread1/isread2 " << isRead1Evaluated << " " << isRead2Evaluated << "\n";
+#endif
+
+    double ret(al.fragPair);
+
+    // limit split read evidence to only one read, b/c it's only possible for one section
+    // of the molecule to independently cross the breakend:
+    if (isRead1Evaluated)
+    {
+        if (isRead2Evaluated)
+        {
+            ret += std::max(al.read1Split, al.read2Split);
+        }
+        else
+        {
+            ret += al.read1Split;
+        }
+    }
+    else if (isRead2Evaluated)
+    {
+        ret += al.read2Split;
+    }
+
+    return ret;
+}
+
+
+
+/// when an sv is treated as 'small', we skip all paired-read evidence and rely on split reads only:
+///
+/// with further model improvements we can add pairs back into the small variant calls:
+///
+/// this function returns 1 for a variant which is "fully large" and 0 for a variant which is "fully small",
+/// with intermediate values for sizes in between
+///
+static
+float
+getSpanningPairWeight(
+    const SVCandidate& sv)
+{
+    const auto svType(getExtendedSVType(sv));
+    if (! ((svType == EXTENDED_SV_TYPE::INSERT) || (svType == EXTENDED_SV_TYPE::DELETE))) return 1.f;
+
+    if ((svType == EXTENDED_SV_TYPE::INSERT) &&
+        SVScorePairAltProcessor::isLargeInsertSV(sv))
+    {
+        static const int minInsertSmallSize(100);
+        static const int maxInsertSmallSize(150);
+        static const LinearScaler<int> insertSizeRamp(minInsertSmallSize, maxInsertSmallSize);
+
+        return insertSizeRamp.getScale(sv.insertSeq.size());
+    }
+    else
+    {
+        /// TODO set these numbers from insert size:
+        static const int minSmallSize(300);
+        static const int maxSmallSize(500);
+        static const LinearScaler<int> svSizeRamp(minSmallSize, maxSmallSize);
+
+        return svSizeRamp.getScale(sv.centerSize());
+    }
+}
+
+
+
+static
+float
+largeNoiseSVPriorWeight(
+    const SVCandidate& sv)
+{
+    static const int smallSize(5000);
+    static const int largeSize(10000);
+    static const LinearScaler<int> svSizeRamp(smallSize, largeSize);
+
+    if (sv.bp1.interval.tid != sv.bp2.interval.tid) return 1.f;
+
+    return svSizeRamp.getScale(sv.centerSize());
+}
+
+
+
+/// return true if any evidence exists for fragment:
+///
+/// \param semiMappedPower multiply out semi-mapped reads (in log space) by this value
+///
+static
+bool
+getRefAltFromFrag(
+    const float spanningPairWeight,
+    const double semiMappedPower,
+    const ProbSet& refChimeraProb,
+    const ProbSet& altChimeraProb,
+    const ProbSet& refSplitMapProb,
+    const ProbSet& altSplitMapProb,
+    const bool isPermissive,
+    const std::string& fragLabel,
+    const SVFragmentEvidence& fragev,
+    AlleleLnLhood& refLnLhoodSet,
+    AlleleLnLhood& altLnLhoodSet,
+    bool& isRead1Evaluated,
+    bool& isRead2Evaluated)
+{
+#ifdef DEBUG_SCORE
+    log_os << __FUNCTION__ << ": qname: " << fragLabel << " fragev: " << fragev << "\n";
+#endif
+
+    /// TODO: add read pairs with one shadow read to the alt read pool
+
+    bool isFragEvaluated(false);
+
+    // high-quality spanning support relies on read1 and read2 mapping well:
+    bool isPairUsable;
+    if (isPermissive)
+    {
+        isPairUsable = (fragev.read1.isObservedAnchor(isPermissive) || fragev.read2.isObservedAnchor(isPermissive));
+    }
+    else
+    {
+        isPairUsable = ((fragev.read1.isScanned && fragev.read2.isScanned) &&
+                        (fragev.read1.isAnchored(isPermissive) || fragev.read2.isAnchored(isPermissive)));
+    }
+
+    if (isPairUsable)
+    {
+        /// only add to the likelihood if the fragment supports at least one allele:
+        if ( fragev.isAnySpanningPairSupport() )
+        {
+            // reduce the impact of spanning reads to zero as svs become small, this is because of complex signal/noise
+            // which the scoring models haven't (yet) been designed to handle.
+            const bool isSemiMapped(! (fragev.read1.isAnchored(isPermissive) && fragev.read2.isAnchored(isPermissive)));
+            double spanPower(spanningPairWeight);
+
+            if (isSemiMapped)
+            {
+                // only count semi-mapped reads for the alt allele
+                if (getSpanningPairAlleleLhood(fragev.alt) > getSpanningPairAlleleLhood(fragev.ref))
+                {
+                    spanPower *= semiMappedPower;
+                }
+                else
+                {
+                    spanPower = 0.;
+                }
+            }
+
+            incrementSpanningPairAlleleLnLhood(refChimeraProb, altChimeraProb, fragev.ref, spanPower, refLnLhoodSet.fragPair);
+            incrementSpanningPairAlleleLnLhood(altChimeraProb, refChimeraProb, fragev.alt, spanPower, altLnLhoodSet.fragPair);
+            isFragEvaluated=true;
+        }
+    }
+
+    /// split support is less dependent on mapping quality of the individual read, because
+    /// we're potentially relying on shadow reads recovered from the unmapped state
+    isRead1Evaluated = true;
+    isRead2Evaluated = true;
+#ifdef DEBUG_SCORE
+    log_os << __FUNCTION__ << ": starting read1 split\n";
+#endif
+    incrementSplitReadLhood(fragLabel, fragev, refSplitMapProb, altSplitMapProb, isPermissive, true,  refLnLhoodSet.read1Split, altLnLhoodSet.read1Split, isRead1Evaluated);
+#ifdef DEBUG_SCORE
+    log_os << __FUNCTION__ << ": starting read2 split\n";
+#endif
+    incrementSplitReadLhood(fragLabel, fragev, refSplitMapProb, altSplitMapProb, isPermissive, false, refLnLhoodSet.read2Split, altLnLhoodSet.read2Split, isRead2Evaluated);
+
+#ifdef DEBUG_SCORE
+    log_os << __FUNCTION__ << ": iseval frag/read1/read2: " << isFragEvaluated << " " << isRead1Evaluated << " " << isRead1Evaluated << "\n";
+#endif
+    return (isFragEvaluated || isRead1Evaluated || isRead2Evaluated);
+}
+
+
+
+/// score diploid germline specific components:
+static
+void
+addDiploidLoglhood(
+    const float spanningPairWeight,
+    const SVEvidence::evidenceTrack_t& sampleEvidence,
+    std::array<double,DIPLOID_GT::SIZE>& loglhood)
+{
+    for (const SVEvidence::evidenceTrack_t::value_type& val : sampleEvidence)
+    {
+        const std::string& fragLabel(val.first);
+        const SVFragmentEvidence& fragev(val.second);
+
+        AlleleLnLhood refLnLhoodSet, altLnLhoodSet;
+        bool isRead1Evaluated(true);
+        bool isRead2Evaluated(true);
+
+        /// TODO: set this from graph data:
+        ///
+        /// put some more thought into this -- is this P (spurious | any old read) or P( spurious | chimera ) ??
+        /// it seems like it should be the latter in the usages that really matter.
+        ///
+        static const ProbSet chimeraProb(1e-3);
+
+        /// use a constant mapping prob for now just to get the zero-th order concept into the model
+        /// that "reads are mismapped at a non-trivial rate"
+        /// TODO: experiment with per-read mapq values
+        ///
+        static const ProbSet refSplitMapProb(1e-6);
+        static const ProbSet altSplitMapProb(1e-5);
+
+        /// don't use semi-mapped reads for germline calling:
+        static const double semiMappedPower(0.);
+
+        static const bool isPermissive(false);
+
+        if (! getRefAltFromFrag(spanningPairWeight, semiMappedPower, chimeraProb, chimeraProb,
+                                refSplitMapProb, altSplitMapProb, isPermissive, fragLabel, fragev,
+                                refLnLhoodSet, altLnLhoodSet, isRead1Evaluated, isRead2Evaluated))
+        {
+            // continue if this fragment was not evaluated for pair or split support for either allele:
+            continue;
+        }
+
+        for (unsigned gt(0); gt<DIPLOID_GT::SIZE; ++gt)
+        {
+            using namespace DIPLOID_GT;
+
+#ifdef DEBUG_SCORE
+            log_os << __FUNCTION__ << ": starting gt: " << gt << " " << label(gt) << "\n";
+#endif
+
+            const index_t gtid(static_cast<index_t>(gt));
+            const double refLnFragLhood(getFragLnLhood(refLnLhoodSet, isRead1Evaluated, isRead2Evaluated));
+#ifdef DEBUG_SCORE
+            log_os << __FUNCTION__ << ": refLnFragLhood: " << refLnFragLhood << "\n";
+#endif
+            const double altLnFragLhood(getFragLnLhood(altLnLhoodSet, isRead1Evaluated, isRead2Evaluated));
+#ifdef DEBUG_SCORE
+            log_os << __FUNCTION__ << ": altLnFragLhood: " << altLnFragLhood << "\n";
+#endif
+            const double refLnLhood(refLnFragLhood + altLnCompFraction(gtid));
+            const double altLnLhood(altLnFragLhood + altLnFraction(gtid));
+            loglhood[gt] += log_sum(refLnLhood, altLnLhood);
+
+#ifdef DEBUG_SCORE
+            log_os << __FUNCTION__ << ": gt/fragref/ref/fragalt/alt/loglhood: "
+                   << label(gt)
+                   << " " << refLnFragLhood
+                   << " " << refLnLhood
+                   << " " << altLnFragLhood
+                   << " " << altLnLhood
+                   << " " << loglhood[gt]
+                   << "\n";
+#endif
+        }
+    }
+}
+
+
+
+/// score diploid germline specific components:
+static
+void
+scoreDiploidSV(
+    const CallOptionsDiploid& diploidOpt,
+    const SVLocusScanner& readScanner,
+    const CallOptionsDiploidDeriv& diploidDopt,
+    const ChromDepthFilterUtil& dFilter,
+    const std::vector<JunctionCallInfo>& junctionData,
+    SVScoreInfoDiploid& diploidInfo)
+{
+    //
+    // compute qualities
+    //
+    static const int maxQ(999);
+
+    assert(! junctionData.empty());
+
+    double jointRefProb(1.);
+
+    const unsigned diploidSampleCount(diploidInfo.samples.size());
+    for (unsigned diploidSampleIndex(0); diploidSampleIndex<diploidSampleCount; ++diploidSampleIndex)
+    {
+        SVScoreInfoDiploidSample& diploidSampleInfo(diploidInfo.samples[diploidSampleIndex]);
+
+        std::array<double,DIPLOID_GT::SIZE> loglhood;
+        std::fill(loglhood.begin(),loglhood.end(),0);
+        for (const JunctionCallInfo& junction : junctionData)
+        {
+            const SVEvidence::evidenceTrack_t& etrack(junction.getEvidence().samples[diploidSampleIndex]);
+            addDiploidLoglhood(junction.getSpanningWeight(), etrack, loglhood);
+        }
+        std::array<double,DIPLOID_GT::SIZE> pprob;
+        for (unsigned gt(0); gt<DIPLOID_GT::SIZE; ++gt)
+        {
+            pprob[gt] = loglhood[gt] + diploidDopt.logPrior[gt];
+        }
+
+        unsigned maxGt(0);
+        normalize_ln_distro(pprob.begin(), pprob.end(), maxGt);
+
+#ifdef DEBUG_SCORE
+        for (unsigned gt(0); gt<DIPLOID_GT::SIZE; ++gt)
+        {
+            log_os << __FUNCTION__ << ": gt/lhood/prior/pprob: "
+                   << DIPLOID_GT::label(gt)
+                   << " " << loglhood[gt]
+                   << " " << diploidDopt.prior[gt]
+                   << " " << pprob[gt]
+                   << "\n";
+        }
+#endif
+
+        diploidSampleInfo.gt=static_cast<DIPLOID_GT::index_t>(maxGt);
+        diploidSampleInfo.gtScore=std::min(maxQ,error_prob_to_qphred(prob_comp(pprob.begin(),pprob.end(), diploidSampleInfo.gt)));
+
+        // set phredLoghood:
+        {
+            unsigned maxIndex(0);
+            for (unsigned gt(1); gt<DIPLOID_GT::SIZE; ++gt)
+            {
+                if (loglhood[gt] > loglhood[maxIndex]) maxIndex = gt;
+            }
+            for (unsigned gt(0); gt<DIPLOID_GT::SIZE; ++gt)
+            {
+                diploidSampleInfo.phredLoghood[gt] = std::min(maxQ,ln_error_prob_to_qphred(loglhood[gt]-loglhood[maxIndex]));
+            }
+        }
+
+        jointRefProb *= pprob[DIPLOID_GT::REF];
+    }
+    diploidInfo.altScore=std::min(maxQ,error_prob_to_qphred(jointRefProb));
+
+
+    //
+    // apply filters
+    //
+    {
+        if (diploidInfo.altScore < diploidOpt.minPassAltScore)
+        {
+            diploidInfo.filters.insert(diploidOpt.minAltFilterLabel);
+        }
+
+        // add sample specific filters
+        bool isAllMinGTFiltered(true);
+        for (unsigned sampleIndex(0); sampleIndex<diploidSampleCount; ++sampleIndex)
+        {
+            SVScoreInfoDiploidSample& diploidSampleInfo(diploidInfo.samples[sampleIndex]);
+            if (diploidSampleInfo.gtScore < diploidOpt.minPassGTScore)
+            {
+                diploidSampleInfo.filters.insert(diploidOpt.minGTFilterLabel);
+            }
+            else
+            {
+                isAllMinGTFiltered=false;
+            }
+        }
+
+        // apply sample-specific filter to whole record when all samples are impacted
+        if ((diploidSampleCount>0) && isAllMinGTFiltered)
+        {
+            diploidInfo.filters.insert(diploidOpt.minGTFilterLabel);
+        }
+
+        const unsigned junctionCount(junctionData.size());
+
+        // apply high depth filter:
+        if (dFilter.isMaxDepthFilter())
+        {
+            unsigned filteredJunctionCount(0);
+            for (const JunctionCallInfo& junction : junctionData)
+            {
+                const SVScoreInfo& baseInfo(junction.getBaseInfo());
+                const SVCandidate& sv(junction.getSV());
+
+                // apply maxdepth filter if either of the breakpoints exceeds the maximum depth:
+                if ((baseInfo.bp1MaxDepth > dFilter.maxDepth(sv.bp1.interval.tid)) ||
+                    (baseInfo.bp2MaxDepth > dFilter.maxDepth(sv.bp2.interval.tid)))
+                {
+                    filteredJunctionCount++;
+                }
+            }
+
+            if ((filteredJunctionCount*2) > junctionCount)
+            {
+                diploidInfo.filters.insert(diploidOpt.maxDepthFilterLabel);
+            }
+        }
+
+        // apply MQ0 filter
+        {
+            unsigned filteredJunctionCount(0);
+            for (const JunctionCallInfo& junction : junctionData)
+            {
+                const SVScoreInfo& baseInfo(junction.getBaseInfo());
+                const SVCandidate& sv(junction.getSV());
+
+                const bool isMQ0FilterSize(isSVBelowMinSize(sv,1000));
+                if (isMQ0FilterSize)
+                {
+                    // apply MQ0 filter for one junction if either breakend meets the filter criteria:
+                    if ((baseInfo.bp1MQ0Frac > diploidOpt.maxMQ0Frac) ||
+                        (baseInfo.bp2MQ0Frac > diploidOpt.maxMQ0Frac))
+                    {
+                        filteredJunctionCount++;
+                    }
+                }
+            }
+
+            if ((filteredJunctionCount*2) > junctionCount)
+            {
+                diploidInfo.filters.insert(diploidOpt.maxMQ0FracLabel);
+            }
+        }
+
+        // apply zero pair filter
+        {
+            // this size represents the outer edge of variant size above which we expect pair
+            // discovery to suffer no dropouts due to normal pair distro sizes
+            static const double insertSizeFactor(1);
+            const unsigned maxClosePairSize(readScanner.getExtremeFifthRange().max * insertSizeFactor);
+
+            unsigned filteredJunctionCount(0);
+            for (const JunctionCallInfo& junction : junctionData)
+            {
+                const SVScoreInfo& baseInfo(junction.getBaseInfo());
+                const SVCandidate& sv(junction.getSV());
+
+                // only apply the zero pair filter to variants that should definitely have located supporting pairs:
+                const EXTENDED_SV_TYPE::index_t svType(getExtendedSVType(sv));
+                const bool isZeroPairFilterSize((svType != EXTENDED_SV_TYPE::INSERT) && (! isSVBelowMinSize(sv, maxClosePairSize)));
+
+                if (isZeroPairFilterSize)
+                {
+                    unsigned totalDiploidSpanningPairCount(0);
+                    for (unsigned diploidSampleIndex(0); diploidSampleIndex<diploidSampleCount; ++diploidSampleIndex)
+                    {
+                        totalDiploidSpanningPairCount += baseInfo.samples[diploidSampleIndex].alt.confidentSpanningPairCount;
+                    }
+
+                    if (totalDiploidSpanningPairCount == 0)
+                    {
+                        filteredJunctionCount++;
+                    }
+                }
+            }
+
+            if ((filteredJunctionCount*2) > junctionCount)
+            {
+                diploidInfo.filters.insert(diploidOpt.noPairSupportLabel);
+            }
+        }
+    }
+}
+
+
+/// score diploid tumor specific components (under tumor-only mode):
+static
+void
+scoreTumorSV(
+    const CallOptionsTumor& tumorOpt,
+    const ChromDepthFilterUtil& dFilter,
+    const std::vector<JunctionCallInfo>& junctionData,
+    SVScoreInfoTumor& tumorInfo)
+{
+    //
+    // compute qualities
+    //
+    assert(! junctionData.empty());
+
+    //TODO: scoring tumor-only variants
+
+    //
+    // apply filters
+    //
+    {
+        //TODO: add score filter
+
+        const unsigned junctionCount(junctionData.size());
+
+        // apply high depth filter:
+        if (dFilter.isMaxDepthFilter())
+        {
+            unsigned filteredJunctionCount(0);
+            for (const JunctionCallInfo& junction : junctionData)
+            {
+                const SVScoreInfo& baseInfo(junction.getBaseInfo());
+                const SVCandidate& sv(junction.getSV());
+
+                // apply maxdepth filter if either of the breakpoints exceeds the maximum depth:
+                if ((baseInfo.bp1MaxDepth > dFilter.maxDepth(sv.bp1.interval.tid)) ||
+                    (baseInfo.bp2MaxDepth > dFilter.maxDepth(sv.bp2.interval.tid)))
+                {
+                    filteredJunctionCount++;
+                }
+            }
+
+            if ((filteredJunctionCount*2) > junctionCount)
+            {
+                tumorInfo.filters.insert(tumorOpt.maxDepthFilterLabel);
+            }
+        }
+
+        // apply MQ0 filter
+        {
+            unsigned filteredJunctionCount(0);
+            for (const JunctionCallInfo& junction : junctionData)
+            {
+                const SVScoreInfo& baseInfo(junction.getBaseInfo());
+                const SVCandidate& sv(junction.getSV());
+
+
+                const bool isMQ0FilterSize(isSVBelowMinSize(sv,1000));
+                if (isMQ0FilterSize)
+                {
+                    // apply MQ0 filter for one junction if either breakend meets the filter criteria:
+                    if ((baseInfo.bp1MQ0Frac > tumorOpt.maxMQ0Frac) ||
+                        (baseInfo.bp2MQ0Frac > tumorOpt.maxMQ0Frac))
+                    {
+                        filteredJunctionCount++;
+                    }
+                }
+            }
+
+            if ((filteredJunctionCount*2) > junctionCount)
+            {
+                tumorInfo.filters.insert(tumorOpt.maxMQ0FracLabel);
+            }
+        }
+    }
+}
+
+
+//todo This is mostly a placeholder. Add real RNA scoring model
+static
+void
+scoreRNASV(
+    const CallOptionsDiploid& diploidOpt,
+    SVScoreInfo& baseInfo,
+    SVScoreInfoDiploid& diploidInfo)
+{
+#ifdef DEBUG_SCORE
+    //log_os << __FUNCTION__ << "Scoring RNA candidate " << sv << "\n";
+#endif
+
+    /// TODO TMP add real sampleIndex
+    const unsigned sampleIndex(0);
+
+    diploidInfo.samples[sampleIndex].gtScore=0;
+    diploidInfo.altScore=42;
+    if (baseInfo.samples[sampleIndex].alt.splitReadCount == 0)
+    {
+        diploidInfo.filters.insert(diploidOpt.rnaFilterLabel);
+#ifdef DEBUG_SCORE
+        log_os << __FUNCTION__ << "Failed. No spanning pair " << "\n";
+#endif
+    }
+    if (baseInfo.samples[sampleIndex].alt.confidentSpanningPairCount == 0)
+    {
+        diploidInfo.filters.insert(diploidOpt.rnaFilterLabel);
+#ifdef DEBUG_SCORE
+        log_os << __FUNCTION__ << "Failed. No split read " << "\n";
+#endif
+    }
+}
+
+static
+unsigned
+getSpanningPairCount(
+    const SVSampleAlleleInfo& allele,
+    const float spanningPairWeight,
+    const bool isPermissive)
+{
+    if (isPermissive) return spanningPairWeight*allele.confidentSemiMappedSpanningPairCount;
+    else              return spanningPairWeight*allele.confidentSpanningPairCount;
+}
+
+
+
+static
+unsigned
+getSupportCount(
+    const SVSampleAlleleInfo& allele,
+    const float spanningPairWeight,
+    const bool isPermissive)
+{
+    return allele.confidentSplitReadCount + getSpanningPairCount(allele, spanningPairWeight, isPermissive);
+}
+
+
+
+#if 0
+static
+double
+estimateSomaticMutationFreq(
+    const SVScoreInfo& baseInfo,
+    const float spanningPairWeight,
+    const bool /*isPermissive*/)
+{
+    static const bool isPermissive(false);
+    const unsigned altCounts = getSupportCount(baseInfo.tumor.alt, spanningPairWeight, isPermissive);
+    const unsigned refCounts = getSupportCount(baseInfo.tumor.ref, spanningPairWeight, isPermissive);
+    if ((altCounts + refCounts) == 0) return 0;
+    return static_cast<double>(altCounts) / static_cast<double>(altCounts + refCounts);
+}
+#endif
+
+
+
+static
+double
+estimateSomaticMutationFreq(
+    const unsigned tumorSampleIndex,
+    const std::vector<JunctionCallInfo>& junctionData,
+    const bool /*isPermissive*/)
+{
+    static const bool isPermissive(false);
+
+    unsigned altCounts(0);
+    unsigned refCounts(0);
+    for (const JunctionCallInfo& junction : junctionData)
+    {
+        const SVScoreInfo& baseInfo(junction.getBaseInfo());
+        const float& spanningPairWeight(junction.getSpanningWeight());
+        const SVSampleInfo& tumorSampleInfo(baseInfo.samples[tumorSampleIndex]);
+        altCounts += getSupportCount(tumorSampleInfo.alt, spanningPairWeight, isPermissive);
+        refCounts += getSupportCount(tumorSampleInfo.ref, spanningPairWeight, isPermissive);
+    }
+    if ((altCounts + refCounts) == 0) return 0;
+    return static_cast<double>(altCounts) / static_cast<double>(altCounts + refCounts);
+}
+
+
+
+#if 0
+static
+double
+estimateNoiseMutationFreq(
+    const SVScoreInfo& baseInfo,
+    const float spanningPairWeight,
+    const bool /*isPermissive*/)
+{
+    static const bool isPermissive(false);
+    const unsigned normalAltCounts = getSupportCount(baseInfo.normal.alt, spanningPairWeight, isPermissive);
+    const unsigned normalRefCounts = getSupportCount(baseInfo.normal.ref, spanningPairWeight, isPermissive);
+    const unsigned tumorAltCounts = getSupportCount(baseInfo.tumor.alt, spanningPairWeight, isPermissive);
+    const unsigned tumorRefCounts = getSupportCount(baseInfo.tumor.ref, spanningPairWeight, isPermissive);
+
+    const unsigned altCounts(normalAltCounts + tumorAltCounts);
+    const unsigned refCounts(normalRefCounts + tumorRefCounts);
+
+    if ((altCounts + refCounts) == 0) return 0;
+    return static_cast<double>(altCounts) / static_cast<double>(altCounts + refCounts);
+}
+#endif
+
+
+
+static
+double
+estimateNoiseMutationFreq(
+    const unsigned normalSampleIndex,
+    const unsigned tumorSampleIndex,
+    const std::vector<JunctionCallInfo>& junctionData,
+    const bool /*isPermissive*/)
+{
+    static const bool isPermissive(false);
+    unsigned altCounts(0);
+    unsigned refCounts(0);
+    for (const JunctionCallInfo& junction : junctionData)
+    {
+        const SVScoreInfo& baseInfo(junction.getBaseInfo());
+        const float& spanningPairWeight(junction.getSpanningWeight());
+
+        const SVSampleInfo& normalSampleInfo(baseInfo.samples[normalSampleIndex]);
+        const unsigned normalAltCounts(getSupportCount(normalSampleInfo.alt, spanningPairWeight, isPermissive));
+        const unsigned normalRefCounts(getSupportCount(normalSampleInfo.ref, spanningPairWeight, isPermissive));
+
+        const SVSampleInfo& tumorSampleInfo(baseInfo.samples[tumorSampleIndex]);
+        const unsigned tumorAltCounts(getSupportCount(tumorSampleInfo.alt, spanningPairWeight, isPermissive));
+        const unsigned tumorRefCounts(getSupportCount(tumorSampleInfo.ref, spanningPairWeight, isPermissive));
+
+        altCounts += (normalAltCounts + tumorAltCounts);
+        refCounts += (normalRefCounts + tumorRefCounts);
+    }
+    if ((altCounts + refCounts) == 0) return 0;
+    return static_cast<double>(altCounts) / static_cast<double>(altCounts + refCounts);
+}
+
+
+
+static
+void
+computeSomaticSampleLoghood(
+    const float spanningPairWeight,
+    const SVEvidence::evidenceTrack_t& evidenceTrack,
+    const double somaticMutationFreq,
+    const double noiseMutationFreq,
+    const bool isPermissive,
+    const bool isTumor,
+    const ProbSet& refChimeraProb,
+    const ProbSet& altChimeraProb,
+    const ProbSet& refSplitMapProb,
+    const ProbSet& altSplitMapProb,
+    std::array<double,SOMATIC_GT::SIZE>& loglhood)
+{
+    // semi-mapped alt reads make a partial contribution in tier1, and a full contribution in tier2:
+    const double semiMappedPower( (isPermissive && (! isTumor)) ? 1. : 0. );
+
+    for (const SVEvidence::evidenceTrack_t::value_type& val : evidenceTrack)
+    {
+        const std::string& fragLabel(val.first);
+        const SVFragmentEvidence& fragev(val.second);
+
+        AlleleLnLhood refLnLhoodSet, altLnLhoodSet;
+        bool isRead1Evaluated(true);
+        bool isRead2Evaluated(true);
+
+        if (! getRefAltFromFrag(spanningPairWeight, semiMappedPower, refChimeraProb, altChimeraProb,
+                                refSplitMapProb, altSplitMapProb, isPermissive, fragLabel, fragev,
+                                refLnLhoodSet, altLnLhoodSet, isRead1Evaluated, isRead2Evaluated))
+        {
+            // continue if this fragment was not evaluated for pair or split support for either allele:
+            continue;
+        }
+
+        for (unsigned gt(0); gt<SOMATIC_GT::SIZE; ++gt)
+        {
+            using namespace SOMATIC_GT;
+
+#ifdef DEBUG_SCORE
+            log_os << __FUNCTION__ << ": starting gt: " << gt << " " << label(gt) << "\n";
+#endif
+
+            const index_t gtid(static_cast<index_t>(gt));
+
+            const double refLnFragLhood(getFragLnLhood(refLnLhoodSet, isRead1Evaluated, isRead2Evaluated));
+            const double altLnFragLhood(getFragLnLhood(altLnLhoodSet, isRead1Evaluated, isRead2Evaluated));
+
+            // update likelihood with Pr[allele | G]
+            const double refLnLhood = refLnFragLhood + altLnCompFraction(gtid, somaticMutationFreq, noiseMutationFreq);
+            const double altLnLhood = altLnFragLhood + altLnFraction(gtid, somaticMutationFreq, noiseMutationFreq);
+
+#ifdef DEBUG_SCORE
+            log_os << __FUNCTION__ << ": refLnFragLhood: " << refLnFragLhood << "\n";
+            log_os << __FUNCTION__ << ": altLnFragLhood: " << altLnFragLhood << "\n";
+            log_os << __FUNCTION__ << ": refLnLhood: " << refLnLhood << "\n";
+            log_os << __FUNCTION__ << ": altLnLhood: " << altLnLhood << "\n";
+            log_os << __FUNCTION__ << ": loghood delta: " << log_sum(refLnLhood, altLnLhood) << "\n";
+#endif
+
+            loglhood[gt] += log_sum(refLnLhood, altLnLhood);
+        }
+    }
+}
+
+
+
+/// score somatic specific components:
+static
+void
+scoreSomaticSV(
+    const unsigned sampleCount,
+    const unsigned diploidSampleCount,
+    const CallOptionsSomatic& somaticOpt,
+    const CallOptionsSomaticDeriv& somaticDopt,
+    const ChromDepthFilterUtil& dFilter,
+    const std::vector<JunctionCallInfo>& junctionData,
+    SVScoreInfoSomatic& somaticInfo)
+{
+    //
+    // compute somatic score
+    //
+    assert(! junctionData.empty());
+    const bool isMJEvent(junctionData.size() > 1);
+
+    // somatic score is computed at a high stringency date tier (1) and low stringency tier (2), the min value is
+    // kept as the final reported quality:
+    static const unsigned tierCount(2);
+    int tierScore[tierCount] = { 0 , 0 };
+
+    // hard code 1 tumor - 1 normal for now, should be able to support multiple tumors in future:
+    assert(sampleCount==2);
+    assert(diploidSampleCount==1);
+    const unsigned normalSampleIndex(0);
+    const unsigned tumorSampleIndex(1);
+
+    /// for multi-junction events, we use the prior noise weight associated with the largest event:
+    float largeNoiseWeight(0.f);
+    for (const JunctionCallInfo& junction : junctionData)
+    {
+        const SVCandidate& sv(junction.getSV());
+        const float weight(largeNoiseSVPriorWeight(sv));
+        if (weight > largeNoiseWeight) largeNoiseWeight = weight;
+    }
+
+    for (unsigned tierIndex(0); tierIndex<tierCount; ++tierIndex)
+    {
+        const bool isPermissive(tierIndex != 0);
+
+        std::array<double,SOMATIC_GT::SIZE> normalSomaticLhood;
+        std::array<double,SOMATIC_GT::SIZE> tumorSomaticLhood;
+        std::fill(normalSomaticLhood.begin(),normalSomaticLhood.end(),0);
+        std::fill(tumorSomaticLhood.begin(),tumorSomaticLhood.end(),0);
+
+        // estimate the somatic mutation rate using alternate allele freq from the tumor sample
+        const double somaticMutationFreq = estimateSomaticMutationFreq(tumorSampleIndex,junctionData, isPermissive);
+
+        // estimate the noise mutation rate using alternate allele freq from the tumor and normal samples
+        const double noiseMutationFreq = estimateNoiseMutationFreq(normalSampleIndex,tumorSampleIndex,junctionData, isPermissive);
+
+#ifdef DEBUG_SOMATIC_SCORE
+        log_os << __FUNCTION__ << ": somaticMutationFrequency: " << somaticMutationFreq << "\n";
+        log_os << __FUNCTION__ << ": noiseMutationFrequency: " << noiseMutationFreq << "\n";
+        log_os << __FUNCTION__ << ": largeNoiseWeight: " << largeNoiseWeight << "\n";
+#endif
+
+        /// TODO: find a better way to set this number from training data:
+        static const ProbSet chimeraProbDefaultSingleJunction(1e-4);
+        static const ProbSet chimeraProbDefaultMultiJunction(2e-5);
+        const ProbSet& chimeraProbDefault( isMJEvent ? chimeraProbDefaultMultiJunction : chimeraProbDefaultSingleJunction );
+
+        static const ProbSet chimeraProbPermissive(5e-6);
+        const ProbSet& chimeraProb( isPermissive ? chimeraProbPermissive : chimeraProbDefault );
+
+        /// use a constant mapping prob for now just to get the zero-th order concept into the model
+        /// that "reads are mismapped at a non-trivial rate"
+        /// TODO: experiment with per-read mapq values
+        ///
+        static const ProbSet refSplitMapProb(1e-6);
+
+        static const ProbSet altSplitMapProbDefault(1e-4);
+        static const ProbSet altSplitMapProbPermissive(1e-6);
+        const ProbSet& altSplitMapProb( isPermissive ? altSplitMapProbPermissive : altSplitMapProbDefault );
+
+        for (const JunctionCallInfo& junction : junctionData)
+        {
+            const SVEvidence& evidence(junction.getEvidence());
+            const float& spanningPairWeight(junction.getSpanningWeight());
+
+            // compute likelihood for the fragments from the tumor sample
+            computeSomaticSampleLoghood(spanningPairWeight, evidence.samples[tumorSampleIndex], somaticMutationFreq, noiseMutationFreq,
+                                        isPermissive, true,
+                                        chimeraProbDefault, chimeraProbDefault,
+                                        refSplitMapProb, altSplitMapProbDefault, tumorSomaticLhood);
+
+            // compute likelihood for the fragments from the normal sample
+            computeSomaticSampleLoghood(spanningPairWeight, evidence.samples[normalSampleIndex], 0, noiseMutationFreq,
+                                        isPermissive, false,
+                                        chimeraProbDefault, chimeraProb,
+                                        refSplitMapProb, altSplitMapProb, normalSomaticLhood);
+        }
+
+        std::array<double,SOMATIC_GT::SIZE> somaticPprob;
+        for (unsigned gt(0); gt<SOMATIC_GT::SIZE; ++gt)
+        {
+            somaticPprob[gt] = tumorSomaticLhood[gt] + normalSomaticLhood[gt] + somaticDopt.logPrior(gt,largeNoiseWeight);
+        }
+
+        {
+            unsigned maxGt(0);
+            normalize_ln_distro(somaticPprob.begin(), somaticPprob.end(), maxGt);
+        }
+
+        // independently estimate diploid genotype:
+        std::array<double,DIPLOID_GT::SIZE> normalLhood;
+        std::fill(normalLhood.begin(),normalLhood.end(),0);
+        for (const JunctionCallInfo& junction : junctionData)
+        {
+            addDiploidLoglhood(junction.getSpanningWeight(), junction.getEvidence().samples[normalSampleIndex], normalLhood);
+        }
+
+        std::array<double,DIPLOID_GT::SIZE> normalPprob;
+        for (unsigned gt(0); gt<DIPLOID_GT::SIZE; ++gt)
+        {
+            normalPprob[gt] = normalLhood[gt]; // uniform prior for now....
+        }
+
+        {
+            unsigned maxGt(0);
+            normalize_ln_distro(normalPprob.begin(), normalPprob.end(), maxGt);
+        }
+
+#ifdef DEBUG_SOMATIC_SCORE
+        for (unsigned gt(0); gt<SOMATIC_GT::SIZE; ++gt)
+        {
+            log_os << __FUNCTION__ << ": somatic gt/tumor_lhood/normal_lhood/prior/pprob: "
+                   << SOMATIC_GT::label(gt)
+                   << " " << tumorSomaticLhood[gt]
+                   << " " << normalSomaticLhood[gt]
+                   << " " << somaticDopt.logPrior(gt,largeNoiseWeight)
+                   << " " << somaticPprob[gt]
+                   << "\n";
+        }
+
+        for (unsigned gt(0); gt<DIPLOID_GT::SIZE; ++gt)
+        {
+            log_os << __FUNCTION__ << ": diploid gt/lhood/pprob: "
+                   << DIPLOID_GT::label(gt)
+                   << " " << normalLhood[gt]
+                   << " " << normalPprob[gt]
+                   << "\n";
+        }
+#endif
+
+        const double nonsomaticProb(prob_comp(somaticPprob.begin(), somaticPprob.end(), SOMATIC_GT::SOM));
+        const double nonrefProb(prob_comp(normalPprob.begin(), normalPprob.end(), DIPLOID_GT::REF));
+
+        // not (somatic AND normal ref):
+        // (1-(1-a)(1-b)) -> a+b-(ab)
+        const double nonsomatic_ref_prob(nonsomaticProb+nonrefProb-(nonsomaticProb*nonrefProb));
+
+        tierScore[tierIndex]=error_prob_to_qphred(nonsomatic_ref_prob);
+
+#ifdef DEBUG_SOMATIC_SCORE
+        log_os << __FUNCTION__ << ": tier: " << tierIndex << " somatic score: " << tierScore[tierIndex] << "\n";
+#endif
+
+        // don't bother with tier2 if tier1 is too low:
+        if (tierScore[tierIndex] <= 0) break;
+    }
+
+    somaticInfo.somaticScore=std::min(tierScore[0],tierScore[1]);
+
+    somaticInfo.somaticScoreTier = 0;
+    if (tierScore[1] > tierScore[0])
+    {
+        somaticInfo.somaticScoreTier = 1;
+    }
+
+
+    //
+    // apply filters
+    //
+    {
+        const unsigned junctionCount(junctionData.size());
+
+        // apply high depth filter:
+        if (dFilter.isMaxDepthFilter())
+        {
+            unsigned filteredJunctionCount(0);
+            for (const JunctionCallInfo& junction : junctionData)
+            {
+                const SVScoreInfo& baseInfo(junction.getBaseInfo());
+                const SVCandidate& sv(junction.getSV());
+
+                // apply maxdepth filter if either of the breakpoints exceeds the maximum depth:
+                if ((baseInfo.bp1MaxDepth > dFilter.maxDepth(sv.bp1.interval.tid)) ||
+                    (baseInfo.bp2MaxDepth > dFilter.maxDepth(sv.bp2.interval.tid)))
+                {
+                    filteredJunctionCount++;
+                }
+            }
+
+            // apply MQ0 filter for an entire event if a majority of junctions meet the junction filter criteria:
+            if ((filteredJunctionCount*2) > junctionCount)
+            {
+                somaticInfo.filters.insert(somaticOpt.maxDepthFilterLabel);
+            }
+        }
+
+        if (somaticInfo.somaticScore < somaticOpt.minPassSomaticScore)
+        {
+            somaticInfo.filters.insert(somaticOpt.minSomaticScoreLabel);
+        }
+
+        // apply MQ0 filter
+        {
+            unsigned filteredJunctionCount(0);
+            for (const JunctionCallInfo& junction : junctionData)
+            {
+                const SVScoreInfo& baseInfo(junction.getBaseInfo());
+                const SVCandidate& sv(junction.getSV());
+
+                const bool isMQ0FilterSize(isSVBelowMinSize(sv,1000));
+                if (isMQ0FilterSize)
+                {
+                    // apply MQ0 filter for one junction if either breakend meets the filter criteria:
+                    if ((baseInfo.bp1MQ0Frac > somaticOpt.maxMQ0Frac) ||
+                        (baseInfo.bp2MQ0Frac > somaticOpt.maxMQ0Frac))
+                    {
+                        filteredJunctionCount++;
+                    }
+                }
+            }
+
+            // apply MQ0 filter for an entire event if a majority of junctions meet the junction filter criteria:
+            if ((filteredJunctionCount*2) > junctionCount)
+            {
+                somaticInfo.filters.insert(somaticOpt.maxMQ0FracLabel);
+            }
+        }
+    }
+}
+
+
+
+void
+SVScorer::
+computeAllScoreModels(
+    const bool isSomatic,
+    const bool isTumorOnly,
+    const std::vector<JunctionCallInfo>& junctionData,
+    SVModelScoreInfo& modelScoreInfo)
+{
+    if (isTumorOnly)
+    {
+        scoreTumorSV(_tumorOpt, _dFilterDiploid, junctionData, modelScoreInfo.tumor);
+    }
+    else
+    {
+        scoreDiploidSV(_diploidOpt, _readScanner, _diploidDopt, _dFilterDiploid, junctionData, modelScoreInfo.diploid);
+
+        // score components specific to somatic model:
+        if (isSomatic)
+        {
+            scoreSomaticSV(_sampleCount,_diploidSampleCount,_somaticOpt, _somaticDopt, _dFilterSomatic, junctionData, modelScoreInfo.somatic);
+        }
+    }
+
+    if (_isRNA)
+    {
+        scoreRNASV(_diploidOpt, modelScoreInfo.base, modelScoreInfo.diploid);
+    }
+}
+
+
+
+void
+SVScorer::
+scoreSV(
+    const SVCandidateSetData& svData,
+    const std::vector<SVCandidateAssemblyData>& mjAssemblyData,
+    const SVMultiJunctionCandidate& mjSV,
+    const std::vector<SVId>& mjSVId,
+    const std::vector<bool>& isJunctionFiltered,
+    const bool isSomatic,
+    const bool isTumorOnly,
+    std::vector<SVModelScoreInfo>& mjModelScoreInfo,
+    SVModelScoreInfo& mjJointModelScoreInfo,
+    bool& isMJEvent,
+    SupportSamples& svSupports)
+{
+    // scoring is roughly divided into two parts -- treating individual dna-junctions
+    // independently (the simpler call mechanism used the great majority of the time) and
+    // joint junction analysis for larger scale events
+    //
+    const unsigned junctionCount(mjSV.junction.size());
+    mjModelScoreInfo.resize(junctionCount);
+    std::vector<SVEvidence> junctionEvidence(junctionCount);
+    std::vector<float> junctionSpanningPairWeight(junctionCount);
+
+    for (unsigned jIndex(0); jIndex<junctionCount; ++jIndex)
+    {
+        mjModelScoreInfo[jIndex].setSampleCount(sampleCount(),diploidSampleCount());
+        junctionEvidence[jIndex].samples.resize(sampleCount());
+    }
+
+    mjJointModelScoreInfo.clear();
+
+    unsigned unfilteredJunctionCount(0);
+
+    std::vector<JunctionCallInfo> junctionData;
+
+    for (unsigned junctionIndex(0); junctionIndex<junctionCount; ++junctionIndex)
+    {
+        if (isJunctionFiltered[junctionIndex]) continue;
+
+#ifdef ANY_DEBUG_SCORE
+        log_os << __FUNCTION__ << ": Scoring single junction " << junctionIndex << "/" << junctionCount << "\n";
+#endif
+
+        unfilteredJunctionCount++;
+
+        const SVCandidateAssemblyData& assemblyData(mjAssemblyData[junctionIndex]);
+        const SVCandidate& sv(mjSV.junction[junctionIndex]);
+        const SVId& svId(mjSVId[junctionIndex]);
+        SVModelScoreInfo& modelScoreInfo(mjModelScoreInfo[junctionIndex]);
+
+        modelScoreInfo.clear();
+
+        // accumulate model-neutral evidence for each candidate (or its corresponding reference allele)
+        SVEvidence& evidence(junctionEvidence[junctionIndex]);
+        scoreSV(svData, assemblyData, isTumorOnly, sv, svId,
+                modelScoreInfo.base, evidence, svSupports);
+
+        // score components specific to diploid-germline model:
+        float& spanningPairWeight(junctionSpanningPairWeight[junctionIndex]);;
+        spanningPairWeight=(getSpanningPairWeight(sv));
+
+        junctionData.resize(1);
+        junctionData[0].init(sv, evidence, modelScoreInfo.base, spanningPairWeight);
+
+        computeAllScoreModels(isSomatic, isTumorOnly, junctionData, modelScoreInfo);
+    }
+
+    //
+    // handle multi-junction case:
+    //
+    if (unfilteredJunctionCount == 1)
+    {
+        isMJEvent=false;
+    }
+    else if (unfilteredJunctionCount == 2)
+    {
+#ifdef ANY_DEBUG_SCORE
+        log_os << __FUNCTION__ << ": Scoring multi-junction " << junctionCount << "\n";
+#endif
+        isMJEvent=true;
+
+        junctionData.resize(unfilteredJunctionCount);
+        for (unsigned junctionIndex(0); junctionIndex<junctionCount; ++junctionIndex)
+        {
+            junctionData[junctionIndex].init(
+                mjSV.junction[junctionIndex],
+                junctionEvidence[junctionIndex],
+                mjModelScoreInfo[junctionIndex].base,
+                junctionSpanningPairWeight[junctionIndex]);
+        }
+
+        computeAllScoreModels(isSomatic, isTumorOnly, junctionData, mjJointModelScoreInfo);
+    }
+    else
+    {
+        using namespace illumina::common;
+        std::ostringstream oss;
+        oss << "ERROR: unexpected junction count: " << unfilteredJunctionCount << ".\n";
+        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+    }
+}
+
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVScorer.hh b/src/c++/lib/applications/GenerateSVCandidates/SVScorer.hh
new file mode 100644
index 0000000..1bd4652
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVScorer.hh
@@ -0,0 +1,292 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders and Xiaoyu Chen
+///
+
+#pragma once
+
+#include "GSCOptions.hh"
+#include "JunctionCallInfo.hh"
+#include "SplitReadAlignment.hh"
+#include "SVEvidence.hh"
+#include "SVScorePairProcessor.hh"
+
+#include "assembly/AssembledContig.hh"
+#include "blt_util/qscore_snp.hh"
+#include "htsapi/bam_streamer.hh"
+#include "htsapi/bam_header_info.hh"
+#include "manta/ChromDepthFilterUtil.hh"
+#include "manta/SVCandidateAssemblyData.hh"
+#include "manta/SVCandidateSetData.hh"
+#include "manta/SVLocusScanner.hh"
+#include "manta/SVModelScoreInfo.hh"
+#include "manta/SVMultiJunctionCandidate.hh"
+#include "manta/SVScoreInfoSomatic.hh"
+
+#include <vector>
+#include <string>
+
+
+struct CallOptionsSharedDeriv
+{
+    explicit
+    CallOptionsSharedDeriv(
+        const CallOptionsShared& opt) :
+        refQ(opt.snpPrior),
+        altQ(0)
+    {}
+
+    const qscore_snp refQ;
+    const qscore_snp altQ;
+};
+
+
+struct CallOptionsDiploidDeriv : private boost::noncopyable
+{
+    explicit
+    CallOptionsDiploidDeriv(
+        const CallOptionsDiploid& opt)
+    {
+        using namespace DIPLOID_GT;
+
+        assert(opt.indelPrior < 0.5);
+
+        prior[HET] = opt.indelPrior;
+        prior[HOM] = opt.indelPrior/2;
+        prior[REF] = 1 - prior[HET] - prior[HOM];
+
+        for (unsigned i(0); i<SIZE; ++i)
+        {
+            logPrior[i] = std::log(prior[i]);
+        }
+    }
+
+    std::array<float,DIPLOID_GT::SIZE> prior;
+    std::array<float,DIPLOID_GT::SIZE> logPrior;
+};
+
+
+struct CallOptionsSomaticDeriv : private boost::noncopyable
+{
+    explicit
+    CallOptionsSomaticDeriv(
+        const CallOptionsSomatic& opt)
+    {
+        using namespace SOMATIC_GT;
+
+        assert(opt.germlineSVPrior < 0.5);
+
+        prior[SOM] = opt.somaticSVPrior;
+        prior[NOISE] = opt.largeNoiseSVPrior;
+
+        prior[HET] = opt.germlineSVPrior;
+        prior[HOM] = opt.germlineSVPrior/2;
+
+        // this assumes all states independent, and somatic and noise only occur on germline ref GT background:
+        const float nonref(prior[SOM]+prior[NOISE]+prior[HET]+prior[HOM]);
+        assert(nonref>=0 && nonref<=1);
+        prior[REF] = 1 - nonref;
+
+        for (unsigned i(0); i<SIZE; ++i)
+        {
+            _logPrior[i] = std::log(prior[i]);
+        }
+
+        smallNoisePrior = opt.smallNoiseSVPrior;
+        largeNoisePrior = opt.largeNoiseSVPrior;
+        logSmallNoisePrior = std::log(opt.smallNoiseSVPrior);
+        logLargeNoisePrior = std::log(opt.largeNoiseSVPrior);
+    }
+
+    float
+    logPrior(
+        const unsigned gt,
+        const float largeNoiseWeight) const
+    {
+        assert(largeNoiseWeight >= 0. && largeNoiseWeight <= 1.);
+
+        if (gt != SOMATIC_GT::NOISE) return _logPrior[gt];
+
+        if (largeNoiseWeight <= 0.) return logSmallNoisePrior;
+        if (largeNoiseWeight >= 1.) return logLargeNoisePrior;
+
+        return std::log((1-largeNoiseWeight)*smallNoisePrior + largeNoiseWeight*largeNoisePrior);
+    }
+
+private:
+    std::array<float,SOMATIC_GT::SIZE> prior;
+    std::array<float,SOMATIC_GT::SIZE> _logPrior;
+
+    float smallNoisePrior;
+    float largeNoisePrior;
+    float logSmallNoisePrior;
+    float logLargeNoisePrior;
+};
+
+
+
+struct SVScorer
+{
+    SVScorer(
+        const GSCOptions& opt,
+        const SVLocusScanner& readScanner,
+        const bam_header_info& header);
+
+    /// gather supporting evidence and generate:
+    /// 1) diploid quality score and genotype for SV candidate
+    /// 2) somatic quality score
+    void
+    scoreSV(
+        const SVCandidateSetData& svData,
+        const std::vector<SVCandidateAssemblyData>& mjAssemblyData,
+        const SVMultiJunctionCandidate& mjSV,
+        const std::vector<SVId>& mjSVId,
+        const std::vector<bool>& isJunctionFiltered,
+        const bool isSomatic,
+        const bool isTumorOnly,
+        std::vector<SVModelScoreInfo>& mjModelScoreInfo,
+        SVModelScoreInfo& mjJointModelScoreInfo,
+        bool& isMJEvent,
+        SupportSamples& svSupports);
+
+    typedef std::shared_ptr<SVScorePairProcessor> pairProcPtr;
+    typedef std::shared_ptr<bam_streamer> streamPtr;
+
+    unsigned
+    sampleCount() const
+    {
+        return _sampleCount;
+    }
+
+    unsigned
+    diploidSampleCount() const
+    {
+        return _diploidSampleCount;
+    }
+
+    const std::vector<std::string>&
+    sampleNames() const
+    {
+        return _sampleNames;
+    }
+
+private:
+
+    void
+    processExistingAltPairInfo(
+        const PairOptions& pairOpt,
+        const SVCandidateSetData& svData,
+        const SVCandidate& sv,
+        const SVId& svId,
+        SVEvidence& evidence,
+        SupportSamples& svSupports);
+
+    /// estimate pair support for an sv candidate
+    /// restricted to simple indel style svs
+    void
+    getSVAltPairSupport(
+        const PairOptions& pairOpt,
+        const SVCandidateAssemblyData& assemblyData,
+        const SVCandidate& sv,
+        SVEvidence& evidence,
+        std::vector<pairProcPtr>& pairProcList);
+
+    /// find spanning read support for the reference allele for sv candidate
+    void
+    getSVRefPairSupport(
+        const PairOptions& pairOpt,
+        const SVCandidate& sv,
+        SVEvidence& evidence,
+        std::vector<pairProcPtr>& pairProcList);
+
+    /// find paired read support for ref and alt alleles
+    void
+    getSVPairSupport(
+        const SVCandidateSetData& svData,
+        const SVCandidateAssemblyData& assemblyData,
+        const SVCandidate& sv,
+        const SVId& svId,
+        SVEvidence& evidence,
+        SupportSamples& svSupports);
+
+    /// find split read support for ref and alt alleles
+    void
+    getSVSplitReadSupport(
+        const SVCandidateAssemblyData& assemblyData,
+        const SVCandidate& sv,
+        const SVId& svId,
+        SVScoreInfo& ssInfo,
+        SVEvidence& evidence,
+        SupportSamples& svSupports);
+
+    /// determine maximum depth and MQ0 frac in region around breakend of normal sample
+    void
+    getBreakendMaxMappedDepthAndMQ0(
+        const bool isMaxDepth,
+        const bool isTumorOnly,
+        const double cutoffDepth,
+        const SVBreakend& bp,
+        unsigned& maxDepth,
+        float& MQ0Frac);
+
+    /// apply all scoring models relevant to this event:
+    void
+    computeAllScoreModels(
+        const bool isSomatic,
+        const bool isTumorOnly,
+        const std::vector<JunctionCallInfo>& junctionData,
+        SVModelScoreInfo& modelScoreInfo);
+
+    /// shared information gathering steps of all scoring models
+    void
+    scoreSV(
+        const SVCandidateSetData& svData,
+        const SVCandidateAssemblyData& assemblyData,
+        const bool isTumorOnly,
+        const SVCandidate& sv,
+        const SVId& svId,
+        SVScoreInfo& ssInfo,
+        SVEvidence& evidence,
+        SupportSamples& svSupports);
+
+    const std::vector<bool> _isAlignmentTumor;
+    const bool _isRNA;
+    const CallOptionsShared _callOpt;
+    const CallOptionsSharedDeriv _callDopt;
+    const CallOptionsDiploid _diploidOpt;
+    const CallOptionsDiploidDeriv _diploidDopt;
+    const ReadScannerOptions _scanOpt;
+    const SVRefinerOptions _refineOpt;
+    const CallOptionsSomatic _somaticOpt;
+    const CallOptionsSomaticDeriv _somaticDopt;
+    const CallOptionsTumor _tumorOpt;
+    const ChromDepthFilterUtil _dFilterDiploid;
+    const ChromDepthFilterUtil _dFilterSomatic;
+    const ChromDepthFilterUtil _dFilterTumor;
+    const SVLocusScanner& _readScanner;
+
+    std::vector<streamPtr> _bamStreams;
+
+    unsigned _sampleCount;
+    unsigned _diploidSampleCount;
+    std::vector<std::string> _sampleNames;
+};
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVScorerPair.cpp b/src/c++/lib/applications/GenerateSVCandidates/SVScorerPair.cpp
new file mode 100644
index 0000000..46c4188
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVScorerPair.cpp
@@ -0,0 +1,668 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders and Xiaoyu Chen
+///
+
+#include "SVScorer.hh"
+#include "SVScorePairAltProcessor.hh"
+#include "SVScorePairRefProcessor.hh"
+
+#include "common/Exceptions.hh"
+#include "htsapi/align_path_bam_util.hh"
+#include "htsapi/bam_streamer.hh"
+#include "htsapi/bam_record_util.hh"
+#include "manta/SVCandidateUtil.hh"
+#include "svgraph/GenomeIntervalUtil.hh"
+
+#include <iostream>
+#include <sstream>
+#include <string>
+
+/// standard debug output for this file:
+//#define DEBUG_PAIR
+
+/// ridiculous debug output for this file:
+//#define DEBUG_MEGAPAIR
+
+//#define DEBUG_SUPPORT
+
+#if defined(DEBUG_PAIR) || defined(DEBUG_SUPPORT)
+#include "blt_util/log.hh"
+#endif
+
+
+
+static
+void
+processBamProcList(
+    const std::vector<SVScorer::streamPtr>& bamList,
+    const SVId& svId,
+    std::vector<SVScorer::pairProcPtr>& pairProcList,
+    SupportSamples& svSupports)
+{
+    const unsigned bamCount(bamList.size());
+    const unsigned bamProcCount(pairProcList.size());
+
+    for (unsigned bamIndex(0); bamIndex < bamCount; ++bamIndex)
+    {
+        // get the minimum set of scan intervals (this should almost always be 1!)
+        std::vector<GenomeInterval> scanIntervals;
+        std::vector<unsigned> intervalMap;
+        {
+            for (SVScorer::pairProcPtr& bpp : pairProcList)
+            {
+                const GenomeInterval& interval(bpp->nextBamIndex(bamIndex));
+                if (interval.range.size() < 1) continue;
+
+                scanIntervals.push_back(interval);
+            }
+
+            intervalMap = intervalCompressor(scanIntervals);
+        }
+
+        bam_streamer& bamStream(*bamList[bamIndex]);
+        SupportFragments& svSupportFrags(svSupports.getSupportFragments(bamIndex));
+
+        const unsigned intervalCount(scanIntervals.size());
+        for (unsigned intervalIndex(0); intervalIndex<intervalCount; ++intervalIndex)
+        {
+            const GenomeInterval& scanInterval(scanIntervals[intervalIndex]);
+            if (scanInterval.range.begin_pos() >= scanInterval.range.end_pos()) continue;
+
+            // set bam stream to new search interval:
+            bamStream.set_new_region(scanInterval.tid, scanInterval.range.begin_pos(), scanInterval.range.end_pos());
+
+            /// define the procs where' going to handle in this interval:
+            std::vector<unsigned> targetProcs;
+            for (unsigned procIndex(0); procIndex<bamProcCount; ++procIndex)
+            {
+                if (intervalMap[procIndex] == intervalIndex)
+                {
+                    targetProcs.push_back(procIndex);
+                }
+            }
+
+            while (bamStream.next())
+            {
+                const bam_record& bamRead(*(bamStream.get_record_ptr()));
+
+                /// this filter is common to all targetProcs:
+                if (SVScorer::pairProcPtr::element_type::isSkipRecordCore(bamRead)) continue;
+
+                for (const unsigned procIndex : targetProcs)
+                {
+                    SVScorer::pairProcPtr& bpp(pairProcList[procIndex]);
+
+                    if (bpp->isSkipRecord(bamRead)) continue;
+                    bpp->processClearedRecord(svId, bamRead, svSupportFrags);
+                }
+            }
+        }
+    }
+}
+
+
+
+void
+SVScorer::
+getSVAltPairSupport(
+    const PairOptions& pairOpt,
+    const SVCandidateAssemblyData& assemblyData,
+    const SVCandidate& sv,
+    SVEvidence& evidence,
+    std::vector<pairProcPtr>& pairProcList)
+{
+    pairProcPtr bp1Ptr(new SVScorePairAltProcessor(_scanOpt, _refineOpt, _isAlignmentTumor, _readScanner, pairOpt, assemblyData, sv, true, evidence));
+    pairProcPtr bp2Ptr(new SVScorePairAltProcessor(_scanOpt, _refineOpt, _isAlignmentTumor, _readScanner, pairOpt, assemblyData, sv, false, evidence));
+
+    pairProcList.push_back(bp1Ptr);
+    pairProcList.push_back(bp2Ptr);
+}
+
+
+
+void
+SVScorer::
+getSVRefPairSupport(
+    const PairOptions& pairOpt,
+    const SVCandidate& sv,
+    SVEvidence& evidence,
+    std::vector<pairProcPtr>& pairProcList)
+{
+    pairProcPtr bp1Ptr(new SVScorePairRefProcessor(_isAlignmentTumor, _readScanner, pairOpt, sv, true, evidence));
+    pairProcPtr bp2Ptr(new SVScorePairRefProcessor(_isAlignmentTumor, _readScanner, pairOpt, sv, false, evidence));
+
+    pairProcList.push_back(bp1Ptr);
+    pairProcList.push_back(bp2Ptr);
+}
+
+
+
+struct SpanReadInfo
+{
+    SpanReadInfo() :
+        isFwdStrand(true),
+        readSize(0)
+    {}
+
+    GenomeInterval interval;
+    bool isFwdStrand;
+    unsigned readSize;
+};
+
+
+
+static
+void
+getFragInfo(
+    const bam_record& localRead,
+    SpanReadInfo& local,
+    SpanReadInfo& remote)
+{
+    using namespace ALIGNPATH;
+
+    // local read:
+    local.isFwdStrand = localRead.is_fwd_strand();
+    local.readSize = localRead.read_size();
+    local.interval.tid = localRead.target_id();
+    const pos_t localBeginPos(localRead.pos()-1);
+
+    // get cigar:
+    path_t localPath;
+    bam_cigar_to_apath(localRead.raw_cigar(), localRead.n_cigar(), localPath);
+
+    const pos_t localEndPos(localBeginPos+apath_ref_length(localPath));
+
+    local.interval.range.set_range(localBeginPos,localEndPos);
+
+    // remote read:
+    remote.isFwdStrand = localRead.is_mate_fwd_strand();
+    remote.readSize = local.readSize;
+    remote.interval.tid = localRead.mate_target_id();
+    const pos_t remoteBeginPos(localRead.mate_pos()-1);
+
+    // approximate end-point of remote read:
+    const pos_t remoteEndPos(remoteBeginPos+localRead.read_size());
+
+    remote.interval.range.set_range(remoteBeginPos,remoteEndPos);
+}
+
+
+
+/// fill in SpanReadInfo as accurately as possible depending on
+/// whether one or both of the read pair's bam records have been found:
+static
+void
+getFragInfo(
+    const SVCandidateSetSequenceFragment& pair,
+    SpanReadInfo& read1,
+    SpanReadInfo& read2)
+{
+    using namespace ALIGNPATH;
+
+    if (pair.read1.isSet())
+    {
+        getFragInfo(pair.read1.bamrec, read1, read2);
+
+        if (pair.read2.isSet())
+        {
+            const bam_record& bamRead2(pair.read2.bamrec);
+
+            read2.readSize = bamRead2.read_size();
+
+            // get cigar:
+            path_t apath2;
+            bam_cigar_to_apath(bamRead2.raw_cigar(), bamRead2.n_cigar(), apath2);
+
+            read2.interval.range.set_end_pos(read2.interval.range.begin_pos() + apath_ref_length(apath2));
+        }
+    }
+    else if (pair.read2.isSet())
+    {
+        getFragInfo(pair.read2.bamrec, read2, read1);
+    }
+    else
+    {
+        assert(false && "Neither fragment read found");
+    }
+}
+
+
+
+/// read pairs are abstracted to two terminals for the purpose of
+/// fragment size estimation in the context of the alternate allele:
+/// tid+pos represent one of the two extreme ends of the fragment in
+/// genomic chromosome+position coordinates
+///
+struct SpanTerminal
+{
+    int32_t tid = 0;
+    pos_t pos = 0;
+    bool isFwdStrand = true;
+    unsigned readSize = 0;
+};
+
+
+#ifdef DEBUG_PAIR
+static
+std::ostream&
+operator<<(std::ostream& os, const SpanTerminal& st)
+{
+    os << "tid: " << st.tid
+       << " pos: "<< st.pos
+       << " isFwdStrand: " << st.isFwdStrand
+       << " readSize: " << st.readSize;
+    return os;
+}
+#endif
+
+
+
+/// convert SpanReadInfo to SpanTerminal
+static
+void
+getTerminal(
+    const SpanReadInfo& rinfo,
+    SpanTerminal& fterm)
+{
+    fterm.tid = rinfo.interval.tid;
+    fterm.isFwdStrand = rinfo.isFwdStrand;
+    fterm.pos = ( fterm.isFwdStrand ? rinfo.interval.range.begin_pos() : rinfo.interval.range.end_pos() );
+    fterm.readSize = rinfo.readSize;
+}
+
+
+
+static
+void
+pairError(
+    const SVCandidate& sv,
+    const SVCandidateSetSequenceFragment& pair,
+    const char* errorMsg)
+{
+    using namespace illumina::common;
+
+    std::ostringstream oss;
+    oss << "ERROR: " << errorMsg << '\n'
+        << "\tcandidate-sv: " << sv
+        << "\tread-pair: " << pair
+        << '\n';
+    BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+}
+
+
+
+/// double check that a read-pair supports an sv, and if so what is the fragment length prob?
+///
+/// note this operation includes matching fragment r1/r2 to sv bp1/bp2, if applicable
+static
+void
+getFragProb(
+    const PairOptions& pairOpt,
+    const SVCandidate& sv,
+    const SVCandidateSetSequenceFragment& pair,
+    const SizeDistribution& fragDistro,
+    const bool isStrictMatch,
+    bool& isFragSupportSV,
+    float& fragProb)
+{
+#ifdef DEBUG_PAIR
+    static const std::string logtag("getFragProb: ");
+#endif
+
+    isFragSupportSV=false;
+    fragProb=0.;
+
+    SpanReadInfo read1;
+    SpanReadInfo read2;
+    getFragInfo(pair, read1, read2);
+
+    // define the end-points of fragment:
+    SpanTerminal frag1;
+    getTerminal(read1,frag1);
+
+    SpanTerminal frag2;
+    getTerminal(read2,frag2);
+
+    const bool isSameFragTid(frag1.tid == frag2.tid);
+    const bool isSameBpTid(sv.bp1.interval.tid == sv.bp2.interval.tid);
+
+    // for strictMatch this must be true (usually this means anom read pairs)
+    // but this assertion won't necessarily hold of split reads, etc...
+    if (isSameFragTid != isSameBpTid)
+    {
+        if (! isStrictMatch) return;
+        pairError(sv,pair,"Can't resolve fragment/sv chromosome pair(s)");
+    }
+
+    const pos_t bp1pos(sv.bp1.interval.range.center_pos());
+    const pos_t bp2pos(sv.bp2.interval.range.center_pos());
+
+    // match bp to frag
+    bool isBpFragReversed(false);
+
+    if (frag1.tid != sv.bp1.interval.tid)
+    {
+        isBpFragReversed=true;
+    }
+    else if (frag1.isFwdStrand != (sv.bp1.state == SVBreakendState::RIGHT_OPEN) )
+    {
+        isBpFragReversed=true;
+    }
+    else if (frag1.isFwdStrand == frag2.isFwdStrand)
+    {
+        // order inversion/complex SV breakends
+        if (isSameFragTid)
+        {
+            if ((frag1.pos < frag2.pos) != (bp1pos < bp2pos))
+            {
+                if (frag1.pos != frag2.pos)
+                {
+                    isBpFragReversed=true;
+                }
+            }
+        }
+    }
+
+    if (isBpFragReversed)
+    {
+        std::swap(frag1,frag2);
+#ifdef DEBUG_PAIR
+        log_os << logtag << "swapping fragments\n";
+#endif
+    }
+
+#ifdef DEBUG_PAIR
+    log_os << logtag << "pair: " << pair << "\n";
+    log_os << logtag << "sv: " << sv << "\n";
+    log_os << logtag << "frag1: " << frag1 << "\n";
+    log_os << logtag << "frag2: " << frag2 << "\n";
+#endif
+
+    // QC the frag/bp matchup:
+    {
+        std::string errorMsg;
+        if (frag1.tid != frag2.tid)
+        {
+            if (frag1.tid != sv.bp1.interval.tid)
+            {
+                errorMsg = "Can't match evidence read chrom to sv-candidate bp1.";
+            }
+            if (frag2.tid != sv.bp2.interval.tid)
+            {
+                errorMsg = "Can't match evidence read chrom to sv-candidate bp2.";
+            }
+        }
+        else if (frag1.isFwdStrand != frag2.isFwdStrand)
+        {
+            if ( frag1.isFwdStrand != (sv.bp1.state == SVBreakendState::RIGHT_OPEN) )
+            {
+                errorMsg = "Can't match evidence read strand to sv-candidate bp1";
+            }
+            if ( frag2.isFwdStrand != (sv.bp2.state == SVBreakendState::RIGHT_OPEN) )
+            {
+                errorMsg = "Can't match evidence read strand to sv-candidate bp2";
+            }
+        }
+        else
+        {
+            if (isSameFragTid)
+            {
+                if ( (frag1.pos < frag2.pos) != (bp1pos < bp2pos) )
+                {
+                    if (frag1.pos != frag2.pos)
+                    {
+                        errorMsg = "Can't match read pair positions to sv-candidate.";
+                    }
+                }
+            }
+        }
+
+        if (! errorMsg.empty())
+        {
+            if (! isStrictMatch) return;
+            pairError(sv,pair,errorMsg.c_str());
+        }
+    }
+
+    pos_t frag1Size(bp1pos-frag1.pos);
+    if (! frag1.isFwdStrand) frag1Size *= -1;
+
+    pos_t frag2Size(bp2pos-frag2.pos);
+    if (! frag2.isFwdStrand) frag2Size *= -1;
+
+#ifdef DEBUG_PAIR
+    log_os << logtag << "frag1size,frag2size: " << frag1Size << " " << frag2Size << "\n";
+#endif
+
+    if (frag1Size < pairOpt.minFragSupport) return;
+    if (frag2Size < pairOpt.minFragSupport) return;
+
+    fragProb=fragDistro.cdf(frag1Size+frag2Size);
+#ifdef DEBUG_PAIR
+    log_os << logtag << "cdf: " << fragProb << " final: " << std::min(fragProb, (1-fragProb)) << "\n";
+#endif
+    fragProb = std::min(fragProb, (1-fragProb));
+
+    /// TODO: any cases where fragProb is 0 or extremely small should be some
+    /// sort of mulit-SV error artifact (like a large CIGAR indel in one of the
+    /// reads of the pair) try to improve this case -- ideally we can account
+    /// for such events.
+    if (fragProb >= pairOpt.minFragProb)
+    {
+        isFragSupportSV = true;
+    }
+
+#ifdef DEBUG_PAIR
+    log_os << logtag << "isSupportSV: " << isFragSupportSV << "\n";
+#endif
+}
+
+
+/// count the read pairs supporting the alternate allele in each sample, using data we already produced during candidate generation:
+///
+void
+SVScorer::
+processExistingAltPairInfo(
+    const PairOptions& pairOpt,
+    const SVCandidateSetData& svData,
+    const SVCandidate& sv,
+    const SVId& svId,
+    SVEvidence& evidence,
+    SupportSamples& svSupports)
+{
+    const unsigned minMapQ(_readScanner.getMinMapQ());
+    const unsigned minTier2MapQ(_readScanner.getMinTier2MapQ());
+
+    const unsigned bamCount(_bamStreams.size());
+    for (unsigned bamIndex(0); bamIndex < bamCount; ++bamIndex)
+    {
+        const SizeDistribution& fragDistro(_readScanner.getFragSizeDistro(bamIndex));
+        SupportFragments& svSupportFrags(svSupports.getSupportFragments(bamIndex));
+
+        const SVCandidateSetSequenceFragmentSampleGroup& svDataGroup(svData.getDataGroup(bamIndex));
+        for (const SVCandidateSetSequenceFragment& fragment : svDataGroup)
+        {
+            // at least one non-supplemental read of the pair must have been found to use this pipeline:
+            if (! (fragment.read1.isSet() || fragment.read2.isSet())) continue;
+
+            // sanity check of read pairs
+            if (! fragment.checkReadPair()) continue;
+
+            // is this read pair associated with this candidateIndex? (each read fragment can be associated with multiple candidates)
+            unsigned linkIndex(0);
+            {
+                bool isIndexFound(false);
+                for (const SVSequenceFragmentAssociation& sva : fragment.svLink)
+                {
+                    if (sv.candidateIndex == sva.index)
+                    {
+                        isIndexFound = true;
+                        break;
+                    }
+                    linkIndex++;
+                }
+
+                if (! isIndexFound) continue;
+            }
+            assert(fragment.svLink.size() > linkIndex);
+
+            const bool isPairType(SVEvidenceType::isPairType(fragment.svLink[linkIndex].evtype));
+
+            // if the evidence comes from a read fragment observation, a very strict matching criteria
+            // is enforced between this pair and the SV candidate. If the read pair association comes from
+            // a CIGAR string for instance, the fragment will not necessarily support the candidate
+            //
+            const bool isStrictMatch(isPairType);
+
+            const std::string& qname(fragment.qname());
+
+#ifdef DEBUG_PAIR
+            log_os << __FUNCTION__ << ": Finding alt fragment evidence for svIndex: " << sv.candidateIndex << " bam-fragment: " << fragment << "\n";
+#endif
+
+            SVFragmentEvidence& fragEvidence(evidence.getSampleEvidence(bamIndex)[qname]);
+            SVFragmentEvidenceAllele& alt(fragEvidence.alt);
+
+            static const bool isShadow(false);
+            if (fragment.read1.isSet())
+            {
+                setReadEvidence(minMapQ, minTier2MapQ, fragment.read1.bamrec, isShadow, fragEvidence.read1);
+            }
+
+            if (fragment.read2.isSet())
+            {
+                setReadEvidence(minMapQ, minTier2MapQ, fragment.read2.bamrec, isShadow, fragEvidence.read2);
+            }
+
+            /// get fragment prob, and possibly withdraw fragment support based on refined sv breakend coordinates:
+            bool isFragSupportSV(false);
+            float fragProb(0);
+            getFragProb(pairOpt, sv, fragment, fragDistro, isStrictMatch, isFragSupportSV, fragProb);
+
+            if (! isFragSupportSV)
+            {
+#ifdef DEBUG_PAIR
+                log_os << __FUNCTION__ << ": no frag support!\n";
+#endif
+                continue;
+            }
+
+            /// TODO: if fragProb is zero this should be a bug -- follow-up to see if we can make this an assert(fragProb > 0.) instead
+            if (fragProb <= 0.)
+            {
+#ifdef DEBUG_PAIR
+                log_os << __FUNCTION__ << ": Fragment with fragProb=0! " << sv.candidateIndex << "  bam-fragment: " << fragment << "\n";
+#endif
+                continue;
+            }
+
+            // for all large spanning events -- we don't test for pair support of the two breakends separately -- this could be
+            // beneficial if there was an unusually large insertion associated with the event. For now we approximate that
+            // these events will mostly not have very large insertions.
+            //
+            alt.bp1.isFragmentSupport = true;
+            alt.bp1.fragLengthProb = fragProb;
+
+            alt.bp2.isFragmentSupport = true;
+            alt.bp2.fragLengthProb = fragProb;
+
+            SupportFragment& supportFrag(svSupportFrags.getSupportFragment(fragment));
+            supportFrag.addSpanningSupport(svId.localId);
+#ifdef DEBUG_SUPPORT
+            log_os << __FUNCTION__ << "  Adding read support (spanning): "
+                   << fragment.qname() << "\n" << supportFrag;
+#endif
+        }
+    }
+}
+
+
+
+void
+SVScorer::
+getSVPairSupport(
+    const SVCandidateSetData& svData,
+    const SVCandidateAssemblyData& assemblyData,
+    const SVCandidate& sv,
+    const SVId& svId,
+    SVEvidence& evidence,
+    SupportSamples& svSupports)
+{
+    const PairOptions pairOpt(_isRNA);
+
+#ifdef DEBUG_PAIR
+    static const std::string logtag("getSVPairSupport: ");
+    log_os << logtag << "starting alt pair search for sv: " << sv << "\n";
+#endif
+
+    std::vector<pairProcPtr> pairProcList;
+
+    bool isAltPairFound(false);
+    if (assemblyData.isCandidateSpanning && (sv.isImprecise() || assemblyData.isSpanning))
+    {
+        bool isIncompleteAltPairInfo = false;
+        if (! sv.isImprecise())
+        {
+            const unsigned deleteSize(getDeleteSize(sv));
+
+            // this size represents the outer edge of variant size above which we expect
+            // that the previous candidate generation pair discovery did not suffer dropouts
+            // due to normal pair distro sizes
+            static const double insertSizeFactor(2);
+            const unsigned maxClosePairSize(_readScanner.getExtremeFifthRange().max * insertSizeFactor);
+            isIncompleteAltPairInfo = ((deleteSize>0) && (deleteSize<=maxClosePairSize));
+        }
+
+        if (! isIncompleteAltPairInfo)
+        {
+            // count the read pairs supporting the alternate allele in each sample
+            // using data we already produced during candidate generation:
+            //
+            processExistingAltPairInfo(pairOpt, svData, sv, svId, evidence, svSupports);
+            isAltPairFound = true;
+
+#ifdef DEBUG_SUPPORT
+            log_os << __FUNCTION__ << "  SV pair support: processed existing alt pairs.\n";
+#endif
+        }
+    }
+
+
+    if (! isAltPairFound)
+    {
+        // for SVs which were assembled without a pair-driven prior hypothesis,
+        // we need to go back to the bam and and find any supporting alt read-pairs
+        getSVAltPairSupport(pairOpt, assemblyData, sv, evidence, pairProcList);
+#ifdef DEBUG_SUPPORT
+        log_os << __FUNCTION__ << "  SV pair support: get new alt pairs.\n";
+#endif
+    }
+
+    // count the read pairs supporting the reference allele on each breakend in each sample:
+    //
+    getSVRefPairSupport(pairOpt, sv, evidence, pairProcList);
+
+    // execute bam scanning for all pairs:
+    //
+    processBamProcList(_bamStreams, svId, pairProcList, svSupports);
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVScorerPairOptions.hh b/src/c++/lib/applications/GenerateSVCandidates/SVScorerPairOptions.hh
new file mode 100644
index 0000000..0e90f70
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVScorerPairOptions.hh
@@ -0,0 +1,46 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders and Xiaoyu Chen
+///
+
+#pragma once
+
+#include "blt_util/blt_types.hh"
+
+
+/// shared options related to read pair support:
+struct PairOptions
+{
+    explicit
+    PairOptions(const bool isRNA) :
+        minFragSupport(50),
+        minFragProb(! isRNA ? 0.0001f : 0.0f)
+    {}
+
+    /// we're interested in any fragments which cross center pos with at least N bases of support on each side
+    /// (note this definition is certain to overlap the split read definition whenever N is less than the read length
+    ///
+    /// for reads shorter than this length, the whole read is required...
+    const pos_t minFragSupport;
+
+    const float minFragProb;
+};
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVScorerShared.cpp b/src/c++/lib/applications/GenerateSVCandidates/SVScorerShared.cpp
new file mode 100644
index 0000000..7aaf72f
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVScorerShared.cpp
@@ -0,0 +1,44 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "SVScorerShared.hh"
+
+void
+setReadEvidence(
+    const unsigned minMapQ,
+    const unsigned minTier2MapQ,
+    const unsigned mapq,
+    const unsigned readSize,
+    const bool isShadow,
+    SVFragmentEvidenceRead& read)
+{
+    if (read.isScanned) return;
+
+    read.isScanned = true;
+    read.mapq = mapq;
+    read.isShadow = isShadow;
+    read.setAnchored(read.mapq >= minMapQ);
+    read.setTier2Anchored(read.mapq >= minTier2MapQ);
+    read.size = readSize;
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVScorerShared.hh b/src/c++/lib/applications/GenerateSVCandidates/SVScorerShared.hh
new file mode 100644
index 0000000..364730a
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVScorerShared.hh
@@ -0,0 +1,52 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "SVEvidence.hh"
+#include "htsapi/bam_record_util.hh"
+
+void
+setReadEvidence(
+    const unsigned minMapQ,
+    const unsigned minTier2MapQ,
+    const unsigned mapq,
+    const unsigned readSize,
+    const bool isShadow,
+    SVFragmentEvidenceRead& read);
+
+
+inline
+void
+setReadEvidence(
+    const unsigned minMapQ,
+    const unsigned minTier2MapQ,
+    const bam_record& bamRead,
+    const bool isShadow,
+    SVFragmentEvidenceRead& read)
+{
+    setReadEvidence(minMapQ, minTier2MapQ,
+                    bamRead.map_qual(), bamRead.read_size(),
+                    isShadow, read);
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVScorerSplit.cpp b/src/c++/lib/applications/GenerateSVCandidates/SVScorerSplit.cpp
new file mode 100644
index 0000000..29bccc8
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVScorerSplit.cpp
@@ -0,0 +1,439 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders and Xiaoyu Chen
+///
+
+#include "SVScorer.hh"
+#include "blt_util/seq_util.hh"
+#include "manta/ShadowReadFinder.hh"
+#include "htsapi/SimpleAlignment_bam_util.hh"
+
+#include "boost/scoped_array.hpp"
+
+//#define DEBUG_SVS
+
+//#define DEBUG_SUPPORT
+
+#if defined(DEBUG_SVS) || defined(DEBUG_SUPPORT)
+#include <iostream>
+#include "blt_util/log.hh"
+#endif
+
+
+static
+void
+incrementAlleleEvidence(
+    const SRAlignmentInfo& bp1SR,
+    const SRAlignmentInfo& bp2SR,
+    const unsigned readMapQ,
+    SVSampleAlleleInfo& allele,
+    SVFragmentEvidenceAlleleBreakendPerRead& bp1Support,
+    SVFragmentEvidenceAlleleBreakendPerRead& bp2Support)
+{
+    float bp1Evidence(0);
+    float bp2Evidence(0);
+    if (bp1SR.isEvidence)
+    {
+        bp1Evidence = bp1SR.evidence;
+        bp1Support.isSplitSupport = true;
+        bp1Support.splitEvidence = bp1Evidence;
+    }
+
+    if (bp1SR.isTier2Evidence)
+    {
+        bp1Support.isTier2SplitSupport = true;
+    }
+
+    bp1Support.splitLnLhood = bp1SR.alignLnLhood;
+
+    if (bp2SR.isEvidence)
+    {
+        bp2Evidence = bp2SR.evidence;
+        bp2Support.isSplitSupport = true;
+        bp2Support.splitEvidence = bp2Evidence;
+    }
+
+    if (bp2SR.isTier2Evidence)
+    {
+        bp2Support.isTier2SplitSupport = true;
+    }
+
+    bp2Support.splitLnLhood = bp2SR.alignLnLhood;
+
+    const float evidence(std::max(bp1Evidence, bp2Evidence));
+
+    if (bp1SR.isEvidence || bp2SR.isEvidence)
+    {
+        allele.splitReadCount++;
+        allele.splitReadEvidence += evidence;
+        allele.splitReadMapQ += readMapQ * readMapQ;
+
+#ifdef DEBUG_SVS
+        log_os << "bp1\n";
+        log_os << bp1SR;
+        log_os << "bp2\n";
+        log_os << bp2SR;
+        log_os << "evidence = " << evidence << "\n";
+        log_os << "accumulated evidence = " << allele.splitReadEvidence << "\n";
+        log_os << "contigCount = " << allele.splitReadCount << "\n\n";
+#endif
+    }
+}
+
+
+
+static
+void
+getReadSplitScore(
+    const bam_record& bamRead,
+    const CallOptionsSharedDeriv& dopt,
+    const SVId& svId,
+    const SVBreakend& bp,
+    const reference_contig_segment& bpRef,
+    const bool isBP1,
+    const unsigned flankScoreSize,
+    const SVAlignmentInfo& svAlignInfo,
+    const unsigned minMapQ,
+    const unsigned minTier2MapQ,
+    const bool isRNA,
+    const bool isShadow,
+    const bool isReversedShadow,
+    SVEvidence::evidenceTrack_t& sampleEvidence,
+    SVSampleInfo& sample,
+    SupportFragments& svSupportFrags)
+{
+    SVFragmentEvidence& fragment(sampleEvidence[bamRead.qname()]);
+
+    const bool isRead1(bamRead.is_first());
+
+    SVFragmentEvidenceAlleleBreakendPerRead& altBp1ReadSupport(fragment.alt.bp1.getRead(isRead1));
+
+#ifdef DEBUG_SVS
+    log_os << __FUNCTION__ << " split scoring read: " << bamRead << "\n";
+#endif
+
+    // in this function we evaluate the hypothesis of both breakends at the same time, the only difference bp1 vs
+    // bp2 makes is where in the bam we look for reads, therefore if we see split evaluation for bp1 or bp2, we can skip this read:
+    if (altBp1ReadSupport.isSplitEvaluated) return;
+
+    SVFragmentEvidenceAlleleBreakendPerRead& refBp1ReadSupport(fragment.ref.bp1.getRead(isRead1));
+    SVFragmentEvidenceAlleleBreakendPerRead& altBp2ReadSupport(fragment.alt.bp2.getRead(isRead1));
+    SVFragmentEvidenceAlleleBreakendPerRead& refBp2ReadSupport(fragment.ref.bp2.getRead(isRead1));
+
+    altBp1ReadSupport.isSplitEvaluated = true;
+    refBp1ReadSupport.isSplitEvaluated = true;
+    altBp2ReadSupport.isSplitEvaluated = true;
+    refBp2ReadSupport.isSplitEvaluated = true;
+
+    std::string readSeq = bamRead.get_bam_read().get_string();
+    const uint8_t* qual(bamRead.qual());
+
+    boost::scoped_array<uint8_t> qualcpy;
+    if (isShadow && isReversedShadow)
+    {
+        reverseCompStr(readSeq);
+
+        qualcpy.reset(new uint8_t[readSeq.size()]);
+        std::reverse_copy(qual,qual+readSeq.size(),qualcpy.get());
+        qual = qualcpy.get();
+    }
+
+    const unsigned readMapQ = bamRead.map_qual();
+
+    setReadEvidence(minMapQ, minTier2MapQ, bamRead, isShadow, fragment.getRead(isRead1));
+
+    // align the read to the somatic contig
+    {
+        SRAlignmentInfo bp1ContigSR;
+        SRAlignmentInfo bp2ContigSR;
+        splitReadAligner(flankScoreSize, readSeq, dopt.altQ, qual, svAlignInfo.bp1ContigSeq(), svAlignInfo.bp1ContigOffset, bp1ContigSR);
+        splitReadAligner(flankScoreSize, readSeq, dopt.altQ, qual, svAlignInfo.bp2ContigSeq(), svAlignInfo.bp2ContigOffset, bp2ContigSR);
+
+        incrementAlleleEvidence(bp1ContigSR, bp2ContigSR, readMapQ, sample.alt, altBp1ReadSupport, altBp2ReadSupport);
+
+        if (fragment.isAltSplitReadSupport(bamRead.is_first()))
+        {
+            SupportFragment& supportFrag(svSupportFrags.getSupportFragment(bamRead));
+            supportFrag.addSplitSupport(bamRead.is_first(), svId.localId);
+#ifdef DEBUG_SUPPORT
+            log_os << __FUNCTION__ << "  Adding read support (split): "
+                   << bamRead.qname();
+            if (bamRead.is_first())
+                log_os << "\tR1";
+            else
+                log_os << "\tR2";
+            log_os << "\n" << supportFrag;
+#endif
+        }
+    }
+
+    // align the read to reference regions
+    {
+        SRAlignmentInfo bp1RefSR;
+        SRAlignmentInfo bp2RefSR;
+        if (!isRNA)
+        {
+            splitReadAligner(flankScoreSize, readSeq, dopt.refQ, qual, svAlignInfo.bp1ReferenceSeq(), svAlignInfo.bp1RefOffset, bp1RefSR);
+            splitReadAligner(flankScoreSize, readSeq, dopt.refQ, qual, svAlignInfo.bp2ReferenceSeq(), svAlignInfo.bp2RefOffset, bp2RefSR);
+        }
+        else
+        {
+            if (isBP1)
+                getRefAlignment(bamRead, bpRef, bp.interval.range, dopt.refQ, bp1RefSR);
+            else
+                getRefAlignment(bamRead, bpRef, bp.interval.range, dopt.refQ, bp2RefSR);
+        }
+#ifdef DEBUG_SVS
+        log_os << "\t reference align bp1: " << bp1RefSR << "\n";
+        log_os << "\t reference align bp2: " << bp2RefSR << "\n";
+#endif
+        // scoring
+        incrementAlleleEvidence(bp1RefSR, bp2RefSR, readMapQ, sample.ref, refBp1ReadSupport, refBp2ReadSupport);
+    }
+}
+
+
+
+static
+void
+scoreSplitReads(
+    const CallOptionsSharedDeriv& dopt,
+    const unsigned flankScoreSize,
+    const SVId& svId,
+    const SVBreakend& bp,
+    const SVAlignmentInfo& svAlignInfo,
+    const reference_contig_segment& bpRef,
+    const bool isBP1,
+    const unsigned minMapQ,
+    const unsigned minTier2MapQ,
+    const int bamShadowRange,
+    const unsigned shadowMinMapq,
+    const bool isRNA,
+    SVEvidence::evidenceTrack_t& sampleEvidence,
+    bam_streamer& readStream,
+    SVSampleInfo& sample,
+    SupportFragments& svSupportFrags)
+{
+    static const int extendedSearchRange(200); // Window to look for alignments that may (if unclipped) overlap the breakpoint
+    // extract reads overlapping the break point
+    // We are not looking for remote reads, (semialigned-) reads mapping near this breakpoint, but not across it
+    // or any other kind of additional reads used for assembly.
+    readStream.set_new_region(bp.interval.tid,
+                              std::max(0, bp.interval.range.begin_pos() - extendedSearchRange),
+                              bp.interval.range.end_pos() + extendedSearchRange);
+    while (readStream.next())
+    {
+        const bam_record& bamRead(*(readStream.get_record_ptr()));
+
+        if (SVLocusScanner::isReadFilteredCore(bamRead)) continue;
+        if (bamRead.is_unmapped()) continue;
+
+        /// TODO: remove this filter?
+        /// The supplemental alignment is likely to be hard-clipped
+        if (bamRead.isNonStrictSupplement()) continue;
+
+        // Skip reads that do not overlap the entire homology range of this breakpoint.
+        const known_pos_range2 bamRange(matchifyEdgeSoftClipRefRange(getAlignment(bamRead)));
+        if (!bamRange.is_range_intersect(bp.interval.range)) continue;
+
+        static const bool isShadow(false);
+        static const bool isReversedShadow(false);
+
+        //const uint8_t mapq(bamRead.map_qual());
+        getReadSplitScore(bamRead, dopt, svId, bp, bpRef, isBP1,
+                          flankScoreSize, svAlignInfo, minMapQ, minTier2MapQ,
+                          isRNA, isShadow, isReversedShadow,
+                          sampleEvidence, sample, svSupportFrags);
+    }
+
+    static const bool isIncludeShadowReads(false);
+
+    // search for appropriate shadow reads to add to the split read pool
+    //
+    if (isIncludeShadowReads)
+    {
+        // depending on breakend type we may only be looking for candidates in one direction:
+        bool isSearchForLeftOpen(true);
+        bool isSearchForRightOpen(true);
+        known_pos_range2 shadowRange;
+        if (bp.state == SVBreakendState::RIGHT_OPEN)
+        {
+            isSearchForLeftOpen = false;
+
+            shadowRange.set_begin_pos(std::max(0,bp.interval.range.begin_pos()-bamShadowRange));
+            shadowRange.set_end_pos(bp.interval.range.begin_pos());
+        }
+        else if (bp.state == SVBreakendState::LEFT_OPEN)
+        {
+            isSearchForRightOpen = false;
+
+            shadowRange.set_begin_pos(bp.interval.range.end_pos());
+            shadowRange.set_end_pos(bp.interval.range.end_pos()+bamShadowRange);
+        }
+        else
+        {
+            assert(false && "Invalid bp state");
+        }
+
+        readStream.set_new_region(bp.interval.tid, shadowRange.begin_pos(), shadowRange.end_pos());
+
+        ShadowReadFinder shadow(shadowMinMapq,isSearchForLeftOpen,isSearchForRightOpen);
+
+        while (readStream.next())
+        {
+            const bam_record& bamRead(*(readStream.get_record_ptr()));
+
+            if (SVLocusScanner::isReadFilteredCore(bamRead)) continue;
+            if (! shadow.check(bamRead)) continue;
+
+            static const bool isShadow(true);
+            const bool isReversedShadow(bamRead.is_mate_fwd_strand());
+
+            //const uint8_t mapq(shadow.getMateMapq());
+            getReadSplitScore(bamRead, dopt, svId, bp, bpRef, isBP1,
+                              flankScoreSize, svAlignInfo, minMapQ, minTier2MapQ,
+                              isRNA, isShadow, isReversedShadow,
+                              sampleEvidence, sample, svSupportFrags);
+        }
+    }
+}
+
+
+
+/// return rms given sum of squares
+static
+float
+finishRms(
+    const float sumSqr,
+    const unsigned count)
+{
+    if (count == 0) return 0.;
+    return std::sqrt(sumSqr / static_cast<float>(count));
+}
+
+
+
+static
+void
+finishRms(
+    SVSampleAlleleInfo& sai)
+{
+    sai.splitReadMapQ = finishRms(sai.splitReadMapQ, sai.splitReadCount);
+}
+
+
+
+/// make final split read computations after bam scanning is finished:
+static
+void
+finishSampleSRData(
+    SVSampleInfo& sample)
+{
+    // finish rms mapq:
+    finishRms(sample.alt);
+    finishRms(sample.ref);
+}
+
+
+
+void
+SVScorer::
+getSVSplitReadSupport(
+    const SVCandidateAssemblyData& assemblyData,
+    const SVCandidate& sv,
+    const SVId& svId,
+    SVScoreInfo& baseInfo,
+    SVEvidence& evidence,
+    SupportSamples& svSupports)
+{
+    // apply the split-read scoring only when:
+    // 1) the SV is precise, i.e. has successfully aligned contigs;
+    // 2) the values of max depth are reasonable (otherwise, the read map may blow out). (filter is run externally)
+
+    if (sv.isImprecise()) return;
+
+    // Get Data on standard read pairs crossing the two breakends,
+
+    // extract SV alignment info for split read evidence
+    const SVAlignmentInfo SVAlignInfo(sv, assemblyData);
+
+    /// how many bases from the end of the microhomology range are part of the split read score?
+    static const unsigned flankScoreSize(50);
+
+    // only consider a split alignment with sufficient flanking sequence:
+    if (! SVAlignInfo.isMinBpEdge(100)) return;
+
+#ifdef DEBUG_SVS
+    log_os << __FUNCTION__ << sv << '\n';
+    log_os << __FUNCTION__ << SVAlignInfo << '\n';
+#endif
+
+    const unsigned minMapQ(_readScanner.getMinMapQ());
+    const unsigned minTier2MapQ(_readScanner.getMinTier2MapQ());
+
+    const unsigned bamCount(_bamStreams.size());
+    for (unsigned bamIndex(0); bamIndex < bamCount; ++bamIndex)
+    {
+        SVSampleInfo& sample(baseInfo.samples[bamIndex]);
+        bam_streamer& bamStream(*_bamStreams[bamIndex]);
+
+        SVEvidence::evidenceTrack_t& sampleEvidence(evidence.getSampleEvidence(bamIndex));
+        SupportFragments& svSupportFrags(svSupports.getSupportFragments(bamIndex));
+
+        const int bamShadowRange(_readScanner.getShadowSearchRange(bamIndex));
+
+        // scoring split reads overlapping bp1
+#ifdef DEBUG_SVS
+        log_os << __FUNCTION__ << " scoring BP1\n";
+#endif
+        scoreSplitReads(_callDopt, flankScoreSize, svId, sv.bp1,
+                        SVAlignInfo, assemblyData.bp1ref, true, minMapQ, minTier2MapQ,
+                        bamShadowRange, _scanOpt.minSingletonMapqCandidates, _isRNA,
+                        sampleEvidence, bamStream, sample, svSupportFrags);
+        // scoring split reads overlapping bp2
+#ifdef DEBUG_SVS
+        log_os << __FUNCTION__ << " scoring BP2\n";
+#endif
+        scoreSplitReads(_callDopt, flankScoreSize, svId, sv.bp2,
+                        SVAlignInfo, assemblyData.bp2ref, false, minMapQ, minTier2MapQ,
+                        bamShadowRange, _scanOpt.minSingletonMapqCandidates, _isRNA,
+                        sampleEvidence, bamStream, sample, svSupportFrags);
+
+        finishSampleSRData(sample);
+    }
+
+#ifdef DEBUG_SVS
+    log_os << "tumor contig SP count: " << baseInfo.tumor.alt.splitReadCount << "\n";
+    log_os << "tumor contig SP evidence: " << baseInfo.tumor.alt.splitReadEvidence << "\n";
+    log_os << "tumor contig SP_mapQ: " << baseInfo.tumor.alt.splitReadMapQ << "\n";
+    log_os << "normal contig SP count: " << baseInfo.normal.alt.splitReadCount << "\n";
+    log_os << "normal contig SP evidence: " << baseInfo.normal.alt.splitReadEvidence << "\n";
+    log_os << "normal contig SP_mapQ: " << baseInfo.normal.alt.splitReadMapQ << "\n";
+
+    log_os << "tumor ref SP count: " << baseInfo.tumor.ref.splitReadCount << "\n";
+    log_os << "tumor ref SP evidence: " << baseInfo.tumor.ref.splitReadEvidence << "\n";
+    log_os << "tumor ref SP_mapQ: " << baseInfo.tumor.ref.splitReadMapQ << "\n";
+    log_os << "normal ref SP count: " << baseInfo.normal.ref.splitReadCount << "\n";
+    log_os << "normal ref SP evidence: " << baseInfo.normal.ref.splitReadEvidence << "\n";
+    log_os << "normal ref SP_mapQ: " << baseInfo.normal.ref.splitReadMapQ << "\n";
+#endif
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVSupports.cpp b/src/c++/lib/applications/GenerateSVCandidates/SVSupports.cpp
new file mode 100644
index 0000000..cb55f27
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVSupports.cpp
@@ -0,0 +1,201 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Xiaoyu Chen
+///
+
+#include "SVSupports.hh"
+
+#include <iostream>
+
+//#define DEBUG_SUPPORT
+
+#ifdef DEBUG_SUPPORT
+#include "blt_util/log.hh"
+#endif
+
+
+std::ostream&
+operator<<( std::ostream& os, const SupportRead& suppRd)
+{
+    os << suppRd.tid << ":" << suppRd.pos << "\t";
+    for (const auto& sv : suppRd.SVs)
+    {
+        os << sv.first << ",";
+    }
+
+    return os;
+}
+
+
+std::ostream&
+operator<<( std::ostream& os, const SupportFragment& suppFrg)
+{
+    os << suppFrg.read1 << "\n"
+       << suppFrg.read2 << "\n";
+    return os;
+}
+
+
+std::ostream&
+operator<<( std::ostream& os, const SupportFragments& suppFrgs)
+{
+    for (const auto& frg : suppFrgs.supportFrags)
+    {
+        os << "qname =" << frg.first << "\n"
+           << frg.second;
+    }
+
+    return os;
+}
+
+
+std::ostream&
+operator<<( std::ostream& os, const SupportSamples& suppSmps)
+{
+    unsigned size(suppSmps.supportSamples.size());
+    for (unsigned i=0; i<size; i++)
+    {
+        os << "sample index = " << i << "\n"
+           << suppSmps.supportSamples[i];
+    }
+
+    return os;
+}
+
+
+void
+processBamRecords(
+    bam_streamer& origBamStream,
+    const GenomeInterval& interval,
+    const support_fragments_t& supportFrags,
+    bam_dumper& bamDumper)
+{
+#ifdef DEBUG_SUPPORT
+    log_os << __FUNCTION__ << "  target interval: "
+           << interval << "\n";
+#endif
+
+    origBamStream.set_new_region(interval.tid, interval.range.begin_pos(), interval.range.end_pos());
+    while (origBamStream.next())
+    {
+        const bam_record* origBamRec(origBamStream.get_record_ptr());
+        bam_record bamRec(*origBamRec);
+
+        const std::string qname(bamRec.qname());
+        support_fragments_t::const_iterator suppFragsIter(supportFrags.find(qname));
+        if (suppFragsIter != supportFrags.end())
+        {
+            const SupportFragment& supportFrag(suppFragsIter->second);
+            const bool isR1Matched(bamRec.is_first() &&
+                                   (bamRec.target_id() == supportFrag.read1.tid) &&
+                                   (bamRec.pos() == supportFrag.read1.pos));
+            const bool isR2Matched((!bamRec.is_first()) &&
+                                   (bamRec.target_id() == supportFrag.read2.tid) &&
+                                   (bamRec.pos() == supportFrag.read2.pos));
+
+            if (isR1Matched || isR2Matched)
+            {
+                const SupportRead& read(isR1Matched ? supportFrag.read1 : supportFrag.read2);
+#ifdef DEBUG_SUPPORT
+                log_os << __FUNCTION__ << "  matched supporting read: "
+                       << read << "\n";
+#endif
+
+                bam1_t& br(*(bamRec.get_data()));
+                // add new customized field of SV IDs that the read supports
+                bool isFirst(true);
+                std::string svStr;
+                for (const auto& sv : read.SVs)
+                {
+                    if (! isFirst) svStr.append(",");
+                    svStr.append(sv.first);
+                    for (const auto& svType : sv.second)
+                    {
+                        svStr.append('|' + svType);
+                    }
+                    if (isFirst) isFirst = false;
+                }
+
+                static const char svtag[] = {'Z','M'};
+                bam_aux_append(&br,svtag,'Z',(svStr.size()+1),
+                               (uint8_t*)(svStr.c_str()));
+
+                // Update bam record bin value
+                bam_update_bin(br);
+                // write to bam
+                bamDumper.put_record(&br);
+            }
+        }
+    }
+}
+
+
+void
+writeSupportBam(bam_streamer_ptr origBamStreamPtr,
+                const SupportFragments& svSupportFrags,
+                bam_dumper_ptr supportBamDumperPtr)
+{
+    std::vector<SupportRead> supportReads;
+    const support_fragments_t& supportFrags(svSupportFrags.supportFrags);
+    for (const auto& frg : supportFrags)
+    {
+        supportReads.push_back(frg.second.read1);
+        supportReads.push_back(frg.second.read2);
+    }
+    // sort all the reads w.r.t. genomic positions
+    std::sort(supportReads.begin(), supportReads.end());
+
+    // generate a set of intervals containing overlapping reads
+    const int readDistance(100);
+    int lastTid = -1;
+    int lastPos = -1;
+    std::vector<GenomeInterval> intervals;
+    for (const auto& suppRd : supportReads)
+    {
+        if  ((lastTid == suppRd.tid) && (lastPos + readDistance >= suppRd.pos))
+        {
+            GenomeInterval& interval(intervals.back());
+            interval.range.set_end_pos(suppRd.pos);
+        }
+        else
+        {
+            GenomeInterval interval(suppRd.tid,suppRd.pos-1,suppRd.pos);
+            intervals.push_back(interval);
+        }
+
+        lastTid=suppRd.tid;
+        lastPos=suppRd.pos;
+    }
+
+    bam_streamer& origBamStream(*origBamStreamPtr);
+    bam_dumper& supportBamDumper(*supportBamDumperPtr);
+    for (const auto& interval : intervals)
+    {
+        processBamRecords(origBamStream, interval,
+                          supportFrags, supportBamDumper);
+    }
+
+}
+
+
+
+
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVSupports.hh b/src/c++/lib/applications/GenerateSVCandidates/SVSupports.hh
new file mode 100644
index 0000000..50dd383
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SVSupports.hh
@@ -0,0 +1,219 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Xiaoyu Chen
+///
+
+#pragma once
+
+#include <set>
+#include <map>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <memory>
+
+#include "manta/SVCandidateSetData.hh"
+#include "htsapi/bam_streamer.hh"
+#include "htsapi/bam_dumper.hh"
+#include "htsapi/bam_record_util.hh"
+
+
+typedef std::shared_ptr<bam_record> bam_record_ptr;
+typedef std::shared_ptr<bam_streamer> bam_streamer_ptr;
+typedef std::shared_ptr<bam_dumper> bam_dumper_ptr;
+
+
+/// Records a single read that support one or more SVs for evidence-BAM output
+struct SupportRead
+{
+    typedef std::map<std::string, std::set<std::string>> SV_supportType_t;
+
+    void addNewSV(
+        const std::string& svId, const std::string& supportType)
+    {
+        if (SVs.find(svId) == SVs.end())
+        {
+            std::set<std::string> supportTypeSet;
+            SVs[svId] = supportTypeSet;
+        }
+
+        SVs[svId].insert(supportType);
+    }
+
+    bool
+    operator<(
+        const SupportRead& rhs) const
+    {
+        if (tid < rhs.tid) return true;
+        if (tid == rhs.tid)
+        {
+            return (pos < rhs.pos);
+        }
+        return false;
+    }
+
+
+
+    int tid = -1;
+    int pos = 0;
+    SV_supportType_t SVs;
+};
+
+std::ostream&
+operator<<( std::ostream& os, const SupportRead& suppRd);
+
+
+/// Records a single fragment (read1 & read2)
+/// that support one or more SVs for evidence-BAM output,
+/// indicating the evidence type
+struct SupportFragment
+{
+
+    void setReads(
+        const bam_record& bamRead)
+    {
+        if (bamRead.is_first())
+        {
+            read1.tid = bamRead.target_id();
+            read1.pos = bamRead.pos();
+            read2.tid = bamRead.mate_target_id();
+            read2.pos = bamRead.mate_pos();
+        }
+        else if (bamRead.is_second())
+        {
+            read1.tid = bamRead.mate_target_id();
+            read1.pos = bamRead.mate_pos();
+            read2.tid = bamRead.target_id();
+            read2.pos = bamRead.pos();
+        }
+    }
+
+    void addSpanningSupport(
+        const std::string& svID)
+    {
+        read1.addNewSV(svID, "PR");
+        read2.addNewSV(svID, "PR");
+    }
+
+    void addSplitSupport(
+        const bool isRead1,
+        const std::string& svID)
+    {
+        if (isRead1)
+        {
+            read1.addNewSV(svID, "SR");
+            read2.addNewSV(svID, "SRM");
+        }
+        else
+        {
+            read2.addNewSV(svID, "SR");
+            read1.addNewSV(svID, "SRM");
+        }
+    }
+
+
+    SupportRead read1;
+    SupportRead read2;
+};
+
+std::ostream&
+operator<<( std::ostream& os, const SupportFragment& suppFrg);
+
+typedef std::map<std::string, SupportFragment> support_fragments_t;
+
+
+/// Records all supporting fragments
+/// that support one or more SVs for evidence-BAM output
+struct SupportFragments
+{
+    SupportFragment& getSupportFragment(
+        const bam_record& bamRead)
+    {
+        const std::string qname(bamRead.qname());
+
+        // create a new entry in the map
+        if (supportFrags.find(qname) == supportFrags.end())
+        {
+            SupportFragment newFrag;
+            newFrag.setReads(bamRead);
+            supportFrags[qname] = newFrag;
+        }
+
+        return supportFrags[qname];
+    }
+
+
+    SupportFragment& getSupportFragment(
+        const SVCandidateSetSequenceFragment& seqFrag)
+    {
+        // Tentatively add an assertion
+        // \TODO add the logic if only supplementary (NOT primary) reads being set
+        assert(seqFrag.read1.isSet() || seqFrag.read2.isSet());
+
+        const SVCandidateSetRead& read(seqFrag.read1.isSet() ? seqFrag.read1 : seqFrag.read2);
+
+        const std::string qname(read.bamrec.qname());
+
+        // create a new entry in the map
+        if (supportFrags.find(qname) == supportFrags.end())
+        {
+            SupportFragment newFrag;
+            newFrag.setReads(read.bamrec);
+            supportFrags[qname] = newFrag;
+        }
+
+        return supportFrags[qname];
+    }
+
+    support_fragments_t supportFrags;
+};
+
+std::ostream&
+operator<<( std::ostream& os, const SupportFragments& suppFrgs);
+
+
+struct SupportSamples
+{
+    SupportFragments& getSupportFragments(
+        const unsigned index)
+    {
+        assert(index < supportSamples.size());
+        return supportSamples[index];
+    }
+
+    std::vector<SupportFragments> supportSamples;
+};
+
+std::ostream&
+operator<<( std::ostream& os, const SupportSamples& suppSmps);
+
+void
+processBamRecords(bam_streamer& origBamStream,
+                  const GenomeInterval& interval,
+                  const support_fragments_t& supportFrags,
+                  bam_dumper& bamDumper);
+
+void
+writeSupportBam(bam_streamer_ptr origBamStream,
+                const SupportFragments& svSupportFrags,
+                bam_dumper_ptr supportBamDumper);
+
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SplitReadAlignment.cpp b/src/c++/lib/applications/GenerateSVCandidates/SplitReadAlignment.cpp
new file mode 100644
index 0000000..02f9b50
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SplitReadAlignment.cpp
@@ -0,0 +1,399 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Xiaoyu Chen
+/// \author Felix Schlesinger
+///
+
+#include "SplitReadAlignment.hh"
+#include "blt_util/blt_types.hh"
+#include "blt_util/log.hh"
+#include "blt_util/seq_printer.hh"
+#include "common/Exceptions.hh"
+#include "htsapi/SimpleAlignment_bam_util.hh"
+
+#include <cassert>
+#include <cmath>
+
+#include <iostream>
+
+
+//#define DEBUG_SRA
+
+
+
+std::ostream&
+operator<<(std::ostream& os, const SRAlignmentInfo& info)
+{
+    os << "leftSize=" << info.leftSize << " homSize=" << info.homSize << " rightSize=" << info.rightSize
+       << " leftMismatches=" << info.leftMismatches << " homMismatches=" << info.homMismatches << " rightMismatches=" << info.rightMismatches
+       << " alignScore=" << info.alignScore
+       << " isEvidence: " << info.isEvidence
+       << " isT2Evidence: " << info.isTier2Evidence
+       << " evidence: " << info.evidence
+       << " alignLnLhood: "  << info.alignLnLhood
+       << '\n';
+    return os;
+}
+
+
+
+/// \params[out] return the LnLhood expected from a perfect match to the reference
+static
+float
+getLnLhood(
+    const std::string& querySeq,
+    const qscore_snp& qualConvert,
+    const uint8_t* queryQual,
+    const std::string& targetSeq,
+    const pos_t targetStartOffset,
+    const known_pos_range2& scoreRange,
+    const bool isBest,
+    const float bestLnLhood)
+{
+    static const float ln_one_third(std::log(1/3.f));
+
+    const unsigned querySize(querySeq.size());
+
+    assert((targetStartOffset+querySize) <= targetSeq.size());
+
+    float lnLhood(0);
+    for (unsigned i(0); i<querySize; i++)
+    {
+        // put a lower-bound on quality values:
+        const int baseQual(std::max(2,static_cast<int>(queryQual[i])));
+
+        if ((targetStartOffset+static_cast<pos_t>(i)) > scoreRange.end_pos()) break;
+        if ((targetStartOffset+static_cast<pos_t>(i)) <= scoreRange.begin_pos()) continue;
+
+        const char targetBase(targetSeq[targetStartOffset+i]);
+
+        if ((querySeq[i] != targetBase) || (querySeq[i] == 'N'))
+        {
+            if ((querySeq[i] == 'N') || (targetBase == 'N'))
+            {
+                static const float lnRandomBase(-std::log(4.f));
+                lnLhood += lnRandomBase;
+            }
+            else
+            {
+                lnLhood += qualConvert.qphred_to_ln_error_prob(baseQual) + ln_one_third;
+            }
+        }
+        else
+        {
+            lnLhood += qualConvert.qphred_to_ln_comp_error_prob(baseQual);
+        }
+
+        if (isBest && (lnLhood < bestLnLhood)) break;
+    }
+
+    return lnLhood;
+}
+
+
+
+static
+void
+calculateAlignScore(
+    const std::string& querySeq,
+    const std::string& targetSeq,
+    const unsigned bestPos,
+    SRAlignmentInfo& alignment)
+{
+    const unsigned querySize = querySeq.size();
+    alignment.leftMismatches=0;
+    alignment.homMismatches=0;
+    alignment.rightMismatches=0;
+
+    assert(bestPos+querySize <= targetSeq.size());
+
+    for (unsigned i(0); i<querySize; i++)
+    {
+        if ((querySeq[i] != targetSeq[bestPos+i]) || (querySeq[i] == 'N'))
+        {
+            if (i<=alignment.leftSize)
+            {
+                alignment.leftMismatches += 1;
+            }
+            else if (i <= (alignment.leftSize+alignment.homSize))
+            {
+                alignment.homMismatches += 1;
+            }
+            else
+            {
+                alignment.rightMismatches += 1;
+            }
+        }
+    }
+
+    alignment.alignScore = querySize - (alignment.leftMismatches+ alignment.homMismatches+ alignment.rightMismatches);
+}
+
+
+
+static
+bool
+isEvidenceCheck(
+    const SRAlignmentInfo& alignment,
+    const unsigned minFlankSize)
+{
+    if (alignment.leftSize < minFlankSize) return false;
+    if (alignment.rightSize < minFlankSize) return false;
+
+    if ((alignment.leftMismatches/(float)alignment.leftSize) >= 0.25) return false;
+    if ((alignment.rightMismatches/(float)alignment.rightSize) >= 0.25) return false;
+
+    const float size(static_cast<float>(alignment.leftSize+alignment.rightSize));
+    if ((alignment.alignScore/size) < 0.9) return false;
+
+    return true;
+}
+
+
+
+static
+void
+setEvidence(
+    SRAlignmentInfo& alignment)
+{
+    //
+    // filters for a read being counted as evidence
+    //
+
+    // adding new flank size threshold -- this might have to be changed based on sv size:
+    static const unsigned minFlankSize(16);
+    static const unsigned minFlankSizeTier2(8);
+    alignment.isEvidence = isEvidenceCheck(alignment,minFlankSize);
+    alignment.isTier2Evidence = isEvidenceCheck(alignment,minFlankSizeTier2);
+
+    alignment.evidence = 0;
+    if (! (alignment.isEvidence || alignment.isTier2Evidence)) return;
+
+    const float size(static_cast<float>(alignment.leftSize+alignment.rightSize));
+    alignment.evidence = 2 * std::min(alignment.leftSize, alignment.rightSize) / (size);
+}
+
+
+
+void
+getRefAlignment(
+    const bam_record& bamRead,
+    const reference_contig_segment& bp1ref,
+    const known_pos_range2& bpPos,
+    const qscore_snp& qualConvert,
+    SRAlignmentInfo& alignment)
+{
+    using namespace ALIGNPATH;
+    const SimpleAlignment align(getAlignment(bamRead));
+    const std::string qrySeq(bamRead.get_bam_read().get_string());
+    const int refLength(apath_ref_length(align.path));
+    std::string bp1Ref;
+    bp1ref.get_substring(align.pos, refLength, bp1Ref);
+    const uint8_t* qual(bamRead.qual());
+#ifdef DEBUG_SRA
+    log_os << __FUNCTION__ << bamRead << '\n';
+    log_os << "\t" << refLength << " " << qrySeq << '\n';
+    log_os << "\t" << bp1Ref.substr(0,10) << '\n';
+#endif
+
+    auto queryIndex(qrySeq.begin());
+    auto refIndex(bp1Ref.begin());
+    for (const path_segment& seg : align.path)
+    {
+        if (is_segment_align_match(seg.type))
+        {
+            for (unsigned i=0; i < seg.length; i++)
+            {
+                int refPos(align.pos + refIndex - bp1Ref.begin());
+                bool isSeqMatch(false);
+                if ((*queryIndex == 'N') || (*refIndex == 'N'))
+                {
+                    static const float lnRandomBase(-std::log(4.f));
+                    alignment.alignLnLhood += lnRandomBase;
+                }
+                else
+                {
+                    const int baseQual(std::max(2, static_cast<int>(qual[i])));
+                    if ((*queryIndex) == (*refIndex))
+                    {
+                        isSeqMatch = true;
+                        alignment.alignLnLhood += qualConvert.qphred_to_ln_comp_error_prob(baseQual);
+                    }
+                    else
+                    {
+                        static const float ln_one_third(std::log(1 / 3.f));
+                        alignment.alignLnLhood += qualConvert.qphred_to_ln_error_prob(baseQual) + ln_one_third;
+                    }
+                }
+
+                if (refPos <= bpPos.begin_pos())
+                {
+                    alignment.leftSize++;
+                    if (!isSeqMatch) alignment.leftMismatches++;
+                }
+                if ((refPos > bpPos.begin_pos()) && (refPos < bpPos.end_pos()))
+                {
+                    alignment.homSize++;
+                    if (!isSeqMatch) alignment.homMismatches++;
+                }
+                if (refPos >= bpPos.end_pos())
+                {
+                    alignment.rightSize++;
+                    if (!isSeqMatch) alignment.rightMismatches++;
+                }
+                queryIndex++;
+                refIndex++;
+            }
+        }
+        else
+        {
+            if (is_segment_type_read_length(seg.type)) std::advance(queryIndex, seg.length);
+            if (is_segment_type_ref_length(seg.type)) std::advance(refIndex, seg.length);
+        }
+    }
+    alignment.alignPos = align.pos - bp1ref.get_offset();
+    alignment.alignScore = apath_matched_length(align.path) - alignment.leftMismatches -
+                           alignment.homMismatches - alignment.rightMismatches;
+    setEvidence(alignment);
+}
+
+
+
+void
+splitReadAligner(
+    const unsigned flankScoreSize,
+    const std::string& querySeq,
+    const qscore_snp& qualConvert,
+    const uint8_t* queryQual,
+    const std::string& targetSeq,
+    const known_pos_range2& targetBpOffsetRange,
+    SRAlignmentInfo& alignment)
+{
+    using namespace illumina::common;
+
+    const unsigned querySize = querySeq.size();
+    const unsigned targetSize = targetSeq.size();
+    if (querySize >= targetSize)
+    {
+        std::ostringstream oss;
+        oss << "ERROR: Unexpected split read alignment input."
+            << " querySize: " << querySize << " targetSize: " << targetSize << '\n'
+            << "querySeq:\n";
+        printSeq(querySeq,oss);
+        oss << '\n'
+            << "targetSeq:\n";
+        printSeq(targetSeq,oss);
+        oss << '\n';
+        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+    }
+
+    // set the scanning start & end to make sure the candidate windows overlapping the breakpoint
+    const unsigned scanStart(std::max(0, static_cast<pos_t>(targetBpOffsetRange.begin_pos()) - static_cast<pos_t>(querySize) + 2));
+    const unsigned scanEnd(std::max(0, std::min((static_cast<pos_t>(targetBpOffsetRange.end_pos())), static_cast<pos_t>(targetSize - querySize))));
+
+    const known_pos_range2 scoreRange(targetBpOffsetRange.begin_pos()-static_cast<pos_t>(flankScoreSize),
+                                      targetBpOffsetRange.end_pos()+static_cast<pos_t>(flankScoreSize));
+
+#ifdef DEBUG_SRA
+    log_os << __FUNCTION__ << "query size = " << querySize << " target size = " << targetSize << '\n';
+    log_os << __FUNCTION__ << "targetBeginPos = " << targetBpOffsetRange.begin_pos() << '\n';
+    log_os << __FUNCTION__ << "scan start = " << scanStart << " scan end = " << scanEnd << '\n';
+#endif
+    if (scanEnd < scanStart)
+    {
+        std::ostringstream oss;
+        oss << "ERROR: scanEnd < scanStart."
+            << " scanEnd: " << scanEnd << " scanStart: " << scanStart
+            << " querySize: " << querySize << " targetSize: " << targetSize << '\n'
+            << "\ttargetRange: " << targetBpOffsetRange << '\n';
+        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+    }
+
+    // do one high-speed pass to find the optimal alignment (in terms of lhood), then compute all the goodies later:
+    float bestLnLhood(0);
+    unsigned bestPos(0);
+    {
+        bool isBest(false);
+        for (unsigned i = scanStart; i<= scanEnd; i++)
+        {
+            const float lnLhood(getLnLhood(querySeq, qualConvert, queryQual,
+                                           targetSeq, i, scoreRange, isBest, bestLnLhood));
+
+#ifdef DEBUG_SRA
+            log_os << __FUNCTION__ << "scanning: " << i << " lhood: " << lnLhood << " bestLnLhood " << bestLnLhood << " isBest " << isBest << " bestPos " << bestPos << '\n';
+#endif
+            if ( (! isBest) || (lnLhood > bestLnLhood))
+            {
+                bestLnLhood = lnLhood;
+                bestPos=i;
+                isBest=true;
+            }
+        }
+        assert(isBest);
+    }
+
+    assert(static_cast<pos_t>(bestPos) <= (targetBpOffsetRange.end_pos()+1));
+    if (static_cast<pos_t>(bestPos) <= (targetBpOffsetRange.begin_pos()+1))
+    {
+        alignment.leftSize = static_cast<pos_t>(targetBpOffsetRange.begin_pos()+1) - bestPos;
+    }
+    else
+    {
+        alignment.leftSize = 0;
+    }
+
+    if (alignment.leftSize > querySize)
+    {
+        std::ostringstream oss;
+        oss << "ERROR: Unexpected split read alignment outcome. "
+            << " targetRange: " << targetBpOffsetRange << " bestPos: " << bestPos << " bestLnLhood: " << bestLnLhood << " querySize: " << querySize << " targetSize: " << targetSize << '\n'
+            << "alignment: " << alignment << "\n"
+            << "querySeq:\n";
+        printSeq(querySeq,oss);
+        oss << '\n'
+            << "targetSeq:\n";
+        printSeq(targetSeq,oss);
+        oss << '\n';
+        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+    }
+    alignment.homSize = std::min(querySize-alignment.leftSize,(static_cast<pos_t>(targetBpOffsetRange.end_pos()+1) - bestPos) - alignment.leftSize);
+
+    if ((alignment.leftSize + alignment.homSize) < querySize)
+    {
+        alignment.rightSize = querySize - (alignment.leftSize + alignment.homSize);
+    }
+    else
+    {
+        alignment.rightSize = 0;
+    }
+    alignment.alignLnLhood = bestLnLhood;
+    alignment.alignPos = bestPos;
+
+    calculateAlignScore(querySeq, targetSeq, bestPos, alignment);
+
+    // filtering the alignment and set evidence
+    setEvidence(alignment);
+
+#ifdef DEBUG_SRA
+    log_os << __FUNCTION__ << "bestpos: " << bestPos << " final alignment\n" << alignment << "\n";
+#endif
+}
diff --git a/src/c++/lib/applications/GenerateSVCandidates/SplitReadAlignment.hh b/src/c++/lib/applications/GenerateSVCandidates/SplitReadAlignment.hh
new file mode 100644
index 0000000..beeafc5
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/SplitReadAlignment.hh
@@ -0,0 +1,88 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Xiaoyu Chen
+/// \author Felix Schlesinger
+///
+
+#pragma once
+
+#include "blt_util/known_pos_range2.hh"
+#include "blt_util/qscore_snp.hh"
+#include "htsapi/bam_record.hh"
+
+#include <cstdint>
+
+#include <string>
+#include <iosfwd>
+
+
+struct SRAlignmentInfo
+{
+    unsigned alignPos = 0;
+    unsigned leftSize = 0;
+    unsigned homSize = 0;
+    unsigned rightSize = 0;
+    unsigned leftMismatches = 0;
+    unsigned homMismatches = 0;
+    unsigned rightMismatches = 0;
+    unsigned alignScore = 0;
+    float alignLnLhood = 0;
+
+    bool isEvidence = false;
+    bool isTier2Evidence = false;
+    float evidence = 0;
+};
+
+std::ostream&
+operator<<(std::ostream& os, const SRAlignmentInfo& info);
+
+
+///
+/// \param[in] flankScoreSize the number of bases to score past the end of microhomology range
+///
+/// \param[in] targetBpOffsetRange this is the range of the breakend (accounting for microhomology) in targetSeq coordinates
+///
+/// TODO: need to add a query subset/length limit, so that as the query size goes up (ie. 2 x 400) we still consistently
+///       detect split read support without having to add more and more reference to the targetSeq
+///
+void
+splitReadAligner(
+    const unsigned flankScoreSize,
+    const std::string& querySeq,
+    const qscore_snp& qualConvert,
+    const uint8_t* queryQual,
+    const std::string& targetSeq,
+    const known_pos_range2& targetBpOffsetRange,
+    SRAlignmentInfo& alignment);
+
+/// Populate an SRAlignmentInfo object based on the existing alignment of the bamRead to the genomic region around this break-end.
+/// Scores the alignment based on (mis-)match counts and likelihood as the splitReadAligner.
+///
+/// \param[in] bpPos this is the range of the breakend (accounting for microhomology) in genome coordinates
+///
+void
+getRefAlignment(
+    const bam_record& bamRead,
+    const reference_contig_segment& bp1ref,
+    const known_pos_range2& bpPos,
+    const qscore_snp& qualConvert,
+    SRAlignmentInfo& alignment);
diff --git a/src/c++/lib/applications/GenerateSVCandidates/test/CMakeLists.txt b/src/c++/lib/applications/GenerateSVCandidates/test/CMakeLists.txt
new file mode 100644
index 0000000..5ce107a
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/test/CMakeLists.txt
@@ -0,0 +1,29 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## Configuration file for the unit tests subdirectory
+##
+## author Ole Schulz-Trieglaff
+##
+################################################################################
+
+#set(ADDITIONAL_UNITTEST_LIB manta_manta)
+include(${THIS_CXX_TEST_LIBRARY_CMAKE})
diff --git a/src/c++/lib/applications/GenerateSVCandidates/test/EdgeRetrieverBinTest.cpp b/src/c++/lib/applications/GenerateSVCandidates/test/EdgeRetrieverBinTest.cpp
new file mode 100644
index 0000000..a24c033
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/test/EdgeRetrieverBinTest.cpp
@@ -0,0 +1,285 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/test/unit_test.hpp"
+
+#include "applications/GenerateSVCandidates/EdgeRetrieverBin.hh"
+#include "svgraph/SVLocusSet.hh"
+
+#include "svgraph/test/SVLocusTestUtil.hh"
+
+#include <iostream>
+
+
+BOOST_AUTO_TEST_SUITE( test_EdgeRetrieverBin )
+
+
+BOOST_AUTO_TEST_CASE( test_EdgeRetrieverOneBin )
+{
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,2,30,40);
+
+    SVLocus locus2;
+    locusAddPair(locus2,3,10,20,4,30,40);
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 1;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus2);
+    set1.checkState(true,true);
+
+    EdgeRetrieverBin edger(set1, 0, 1, 0);
+
+    BOOST_REQUIRE( edger.next() );
+
+    EdgeInfo edge = edger.getEdge();
+    BOOST_REQUIRE_EQUAL(edge.locusIndex, 0u);
+    BOOST_REQUIRE_EQUAL(edge.nodeIndex1, 0u);
+    BOOST_REQUIRE_EQUAL(edge.nodeIndex2, 1u);
+
+    BOOST_REQUIRE( edger.next() );
+
+    edge = edger.getEdge();
+    BOOST_REQUIRE_EQUAL(edge.locusIndex, 1u);
+    BOOST_REQUIRE_EQUAL(edge.nodeIndex1, 0u);
+    BOOST_REQUIRE_EQUAL(edge.nodeIndex2, 1u);
+
+    BOOST_REQUIRE( ! edger.next() );
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_EdgeRetrieverManyBin )
+{
+    SVLocus locus1;
+    const NodeIndexType nodePtr1 = locus1.addNode(GenomeInterval(1,10,20));
+    const NodeIndexType nodePtr2 = locus1.addNode(GenomeInterval(2,30,40));
+    locus1.linkNodes(nodePtr1, nodePtr2);
+    const NodeIndexType nodePtr3 = locus1.addNode(GenomeInterval(3,30,40));
+    locus1.linkNodes(nodePtr1, nodePtr3);
+    const NodeIndexType nodePtr4 = locus1.addNode(GenomeInterval(4,30,40));
+    locus1.linkNodes(nodePtr1, nodePtr4);
+    SVLocus locus4;
+    locusAddPair(locus4,7,10,20,8,30,40);
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 1;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus4);
+    set1.checkState(true,true);
+
+    static const unsigned binTotal(2);
+    for (unsigned binIndex(0); binIndex<binTotal; ++binIndex)
+    {
+        EdgeRetrieverBin edger(set1, 0, binTotal, binIndex);
+
+        BOOST_REQUIRE( edger.next() );
+
+        EdgeInfo edge = edger.getEdge();
+
+        if (binIndex == 0)
+        {
+            BOOST_REQUIRE_EQUAL(edge.locusIndex, 0u);
+            BOOST_REQUIRE_EQUAL(edge.nodeIndex1, 0u);
+            BOOST_REQUIRE_EQUAL(edge.nodeIndex2, 1u);
+        }
+        else
+        {
+            BOOST_REQUIRE_EQUAL(edge.locusIndex, 0u);
+            BOOST_REQUIRE_EQUAL(edge.nodeIndex1, 0u);
+            BOOST_REQUIRE_EQUAL(edge.nodeIndex2, 3u);
+        }
+        BOOST_REQUIRE( edger.next() );
+
+        edge = edger.getEdge();
+        if (binIndex == 0)
+        {
+            BOOST_REQUIRE_EQUAL(edge.locusIndex, 0u);
+            BOOST_REQUIRE_EQUAL(edge.nodeIndex1, 0u);
+            BOOST_REQUIRE_EQUAL(edge.nodeIndex2, 2u);
+        }
+        else
+        {
+            BOOST_REQUIRE_EQUAL(edge.locusIndex, 1u);
+            BOOST_REQUIRE_EQUAL(edge.nodeIndex1, 0u);
+            BOOST_REQUIRE_EQUAL(edge.nodeIndex2, 1u);
+        }
+
+        BOOST_REQUIRE( ! edger.next() );
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE( test_EdgeRetrieverManyBin2 )
+{
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,2,30,40);
+    SVLocus locus2;
+    locusAddPair(locus2,3,10,20,4,30,40);
+    SVLocus locus3;
+    locusAddPair(locus3,5,10,20,6,30,40);
+    SVLocus locus4;
+    locusAddPair(locus4,7,10,20,8,30,40);
+    SVLocus locus5;
+    locusAddPair(locus5,9,10,20,10,30,40);
+    SVLocus locus6;
+    locusAddPair(locus6,11,10,20,12,30,40);
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 1;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus2);
+    set1.merge(locus3);
+    set1.merge(locus4);
+    set1.merge(locus5);
+    set1.merge(locus6);
+    set1.checkState(true,true);
+
+    for (unsigned binIndex(0); binIndex<3; ++binIndex)
+    {
+        EdgeRetrieverBin edger(set1, 0, 3, binIndex);
+
+        BOOST_REQUIRE( edger.next() );
+
+        EdgeInfo edge = edger.getEdge();
+        BOOST_REQUIRE_EQUAL(edge.locusIndex, 0u+(binIndex*2));
+        BOOST_REQUIRE_EQUAL(edge.nodeIndex1, 0u);
+        BOOST_REQUIRE_EQUAL(edge.nodeIndex2, 1u);
+
+        BOOST_REQUIRE( edger.next() );
+
+        edge = edger.getEdge();
+        BOOST_REQUIRE_EQUAL(edge.locusIndex, 1u+(binIndex*2));
+        BOOST_REQUIRE_EQUAL(edge.nodeIndex1, 0u);
+        BOOST_REQUIRE_EQUAL(edge.nodeIndex2, 1u);
+
+        BOOST_REQUIRE( ! edger.next() );
+    }
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_EdgeRetrieverOddBin )
+{
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,2,30,40);
+    SVLocus locus2;
+    locusAddPair(locus2,3,10,20,4,30,40);
+    SVLocus locus3;
+    locusAddPair(locus3,5,10,20,6,30,40);
+    SVLocus locus4;
+    locusAddPair(locus4,7,10,20,8,30,40);
+    SVLocus locus5;
+    locusAddPair(locus5,9,10,20,10,30,40);
+    SVLocus locus6;
+    locusAddPair(locus6,11,10,20,12,30,40);
+    SVLocus locus7;
+    locusAddPair(locus7,13,10,20,14,30,40);
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 1;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus2);
+    set1.merge(locus3);
+    set1.merge(locus4);
+    set1.merge(locus5);
+    set1.merge(locus6);
+    set1.merge(locus7);
+    set1.checkState(true,true);
+
+    unsigned count(0);
+    for (unsigned binIndex(0); binIndex<3; ++binIndex)
+    {
+        EdgeRetrieverBin edger(set1, 0, 3, binIndex);
+
+        while (edger.next())
+        {
+            count++;
+        }
+    }
+
+    BOOST_REQUIRE_EQUAL(count,7u);
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_EdgeRetrieverOddBinSelfEdge )
+{
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,1,10,20, true);
+    SVLocus locus2;
+    locusAddPair(locus2,3,10,20,3,10,20, true);
+    SVLocus locus3;
+    locusAddPair(locus3,5,10,20,5,10,20, true);
+    SVLocus locus4;
+    locusAddPair(locus4,7,10,20,8,30,40);
+    SVLocus locus5;
+    locusAddPair(locus5,9,10,20,9,10,20, true);
+    SVLocus locus6;
+    locusAddPair(locus6,11,10,20,12,30,40);
+    SVLocus locus7;
+    locusAddPair(locus7,13,10,20,14,30,40);
+
+    locus1.mergeSelfOverlap();
+    locus2.mergeSelfOverlap();
+    locus3.mergeSelfOverlap();
+    locus5.mergeSelfOverlap();
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 1;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus2);
+    set1.merge(locus3);
+    set1.merge(locus4);
+    set1.merge(locus5);
+    set1.merge(locus6);
+    set1.merge(locus7);
+    set1.checkState(true,true);
+
+    std::set<unsigned> loci;
+    unsigned count(0);
+    for (unsigned binIndex(0); binIndex<3; ++binIndex)
+    {
+        EdgeRetrieverBin edger(set1, 0, 3, binIndex);
+
+        while (edger.next())
+        {
+            const EdgeInfo& edge(edger.getEdge());
+            BOOST_REQUIRE_EQUAL(loci.count(edge.locusIndex),0u);
+            loci.insert(edge.locusIndex);
+            count++;
+        }
+    }
+
+    BOOST_REQUIRE_EQUAL(count,7u);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/applications/GenerateSVCandidates/test/EdgeRetrieverJumpBinTest.cpp b/src/c++/lib/applications/GenerateSVCandidates/test/EdgeRetrieverJumpBinTest.cpp
new file mode 100644
index 0000000..e839ad2
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/test/EdgeRetrieverJumpBinTest.cpp
@@ -0,0 +1,289 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/test/unit_test.hpp"
+
+#include "applications/GenerateSVCandidates/EdgeRetrieverJumpBin.hh"
+#include "svgraph/SVLocusSet.hh"
+
+#include "svgraph/test/SVLocusTestUtil.hh"
+
+#include <iostream>
+
+
+BOOST_AUTO_TEST_SUITE( test_EdgeRetrieverJumpBin )
+
+
+BOOST_AUTO_TEST_CASE( test_EdgeRetrieverJumpOneBin )
+{
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,2,30,40);
+
+    SVLocus locus2;
+    locusAddPair(locus2,3,10,20,4,30,40);
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 1;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus2);
+    set1.checkState(true,true);
+
+    EdgeRetrieverJumpBin edger(set1, 0, 1, 0);
+
+    BOOST_REQUIRE( edger.next() );
+
+    EdgeInfo edge = edger.getEdge();
+    BOOST_REQUIRE_EQUAL(edge.locusIndex, 0u);
+    BOOST_REQUIRE_EQUAL(edge.nodeIndex1, 0u);
+    BOOST_REQUIRE_EQUAL(edge.nodeIndex2, 1u);
+
+    BOOST_REQUIRE( edger.next() );
+
+    edge = edger.getEdge();
+    BOOST_REQUIRE_EQUAL(edge.locusIndex, 1u);
+    BOOST_REQUIRE_EQUAL(edge.nodeIndex1, 0u);
+    BOOST_REQUIRE_EQUAL(edge.nodeIndex2, 1u);
+
+    BOOST_REQUIRE( ! edger.next() );
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_EdgeRetrieverJumpManyBin )
+{
+    SVLocus locus1;
+    const NodeIndexType nodePtr1 = locus1.addNode(GenomeInterval(1,10,20));
+    const NodeIndexType nodePtr2 = locus1.addNode(GenomeInterval(2,30,40));
+    locus1.linkNodes(nodePtr1, nodePtr2);
+    const NodeIndexType nodePtr3 = locus1.addNode(GenomeInterval(3,30,40));
+    locus1.linkNodes(nodePtr1, nodePtr3);
+    const NodeIndexType nodePtr4 = locus1.addNode(GenomeInterval(4,30,40));
+    locus1.linkNodes(nodePtr1, nodePtr4);
+    SVLocus locus4;
+    locusAddPair(locus4,7,10,20,8,30,40);
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 1;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus4);
+    set1.checkState(true,true);
+
+    static const unsigned binTotal(2);
+    for (unsigned binIndex(0); binIndex<binTotal; ++binIndex)
+    {
+        EdgeRetrieverJumpBin edger(set1, 0, binTotal, binIndex);
+
+        BOOST_REQUIRE( edger.next() );
+
+        EdgeInfo edge = edger.getEdge();
+
+        if (binIndex == 0)
+        {
+            BOOST_REQUIRE_EQUAL(edge.locusIndex, 0u);
+            BOOST_REQUIRE_EQUAL(edge.nodeIndex1, 0u);
+            BOOST_REQUIRE_EQUAL(edge.nodeIndex2, 1u);
+        }
+        else
+        {
+            BOOST_REQUIRE_EQUAL(edge.locusIndex, 0u);
+            BOOST_REQUIRE_EQUAL(edge.nodeIndex1, 0u);
+            BOOST_REQUIRE_EQUAL(edge.nodeIndex2, 2u);
+        }
+        BOOST_REQUIRE( edger.next() );
+
+        edge = edger.getEdge();
+        if (binIndex == 0)
+        {
+            BOOST_REQUIRE_EQUAL(edge.locusIndex, 0u);
+            BOOST_REQUIRE_EQUAL(edge.nodeIndex1, 0u);
+            BOOST_REQUIRE_EQUAL(edge.nodeIndex2, 3u);
+        }
+        else
+        {
+            BOOST_REQUIRE_EQUAL(edge.locusIndex, 1u);
+            BOOST_REQUIRE_EQUAL(edge.nodeIndex1, 0u);
+            BOOST_REQUIRE_EQUAL(edge.nodeIndex2, 1u);
+        }
+
+        BOOST_REQUIRE( ! edger.next() );
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE( test_EdgeRetrieverJumpManyBin2 )
+{
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,2,30,40);
+    SVLocus locus2;
+    locusAddPair(locus2,3,10,20,4,30,40);
+    SVLocus locus3;
+    locusAddPair(locus3,5,10,20,6,30,40);
+    SVLocus locus4;
+    locusAddPair(locus4,7,10,20,8,30,40);
+    SVLocus locus5;
+    locusAddPair(locus5,9,10,20,10,30,40);
+    SVLocus locus6;
+    locusAddPair(locus6,11,10,20,12,30,40);
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 1;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus2);
+    set1.merge(locus3);
+    set1.merge(locus4);
+    set1.merge(locus5);
+    set1.merge(locus6);
+    set1.checkState(true,true);
+
+    static const unsigned binTotal(3);
+    for (unsigned binIndex(0); binIndex<binTotal; ++binIndex)
+    {
+        EdgeRetrieverJumpBin edger(set1, 0, binTotal, binIndex);
+
+        BOOST_REQUIRE( edger.next() );
+
+        EdgeInfo edge = edger.getEdge();
+        BOOST_REQUIRE_EQUAL(edge.locusIndex, 0u+binIndex);
+        BOOST_REQUIRE_EQUAL(edge.nodeIndex1, 0u);
+        BOOST_REQUIRE_EQUAL(edge.nodeIndex2, 1u);
+
+        BOOST_REQUIRE( edger.next() );
+
+        edge = edger.getEdge();
+        BOOST_REQUIRE_EQUAL(edge.locusIndex, binTotal+binIndex);
+        BOOST_REQUIRE_EQUAL(edge.nodeIndex1, 0u);
+        BOOST_REQUIRE_EQUAL(edge.nodeIndex2, 1u);
+
+        BOOST_REQUIRE( ! edger.next() );
+    }
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_EdgeRetrieverJumpOddBin )
+{
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,2,30,40);
+    SVLocus locus2;
+    locusAddPair(locus2,3,10,20,4,30,40);
+    SVLocus locus3;
+    locusAddPair(locus3,5,10,20,6,30,40);
+    SVLocus locus4;
+    locusAddPair(locus4,7,10,20,8,30,40);
+    SVLocus locus5;
+    locusAddPair(locus5,9,10,20,10,30,40);
+    SVLocus locus6;
+    locusAddPair(locus6,11,10,20,12,30,40);
+    SVLocus locus7;
+    locusAddPair(locus7,13,10,20,14,30,40);
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 1;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus2);
+    set1.merge(locus3);
+    set1.merge(locus4);
+    set1.merge(locus5);
+    set1.merge(locus6);
+    set1.merge(locus7);
+    set1.checkState(true,true);
+
+    unsigned count(0);
+    static const unsigned binTotal(3);
+    for (unsigned binIndex(0); binIndex<binTotal; ++binIndex)
+    {
+        EdgeRetrieverJumpBin edger(set1, 0, binTotal, binIndex);
+
+        while (edger.next())
+        {
+            count++;
+        }
+    }
+
+    BOOST_REQUIRE_EQUAL(count,7u);
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_EdgeRetrieverJumpOddBinSelfEdge )
+{
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,1,10,20, true);
+    SVLocus locus2;
+    locusAddPair(locus2,3,10,20,3,10,20, true);
+    SVLocus locus3;
+    locusAddPair(locus3,5,10,20,5,10,20, true);
+    SVLocus locus4;
+    locusAddPair(locus4,7,10,20,8,30,40);
+    SVLocus locus5;
+    locusAddPair(locus5,9,10,20,9,10,20, true);
+    SVLocus locus6;
+    locusAddPair(locus6,11,10,20,12,30,40);
+    SVLocus locus7;
+    locusAddPair(locus7,13,10,20,14,30,40);
+
+    locus1.mergeSelfOverlap();
+    locus2.mergeSelfOverlap();
+    locus3.mergeSelfOverlap();
+    locus5.mergeSelfOverlap();
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 1;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus2);
+    set1.merge(locus3);
+    set1.merge(locus4);
+    set1.merge(locus5);
+    set1.merge(locus6);
+    set1.merge(locus7);
+    set1.checkState(true,true);
+
+    std::set<unsigned> loci;
+    unsigned count(0);
+
+    static const unsigned binTotal(3);
+    for (unsigned binIndex(0); binIndex<binTotal; ++binIndex)
+    {
+        EdgeRetrieverJumpBin edger(set1, 0, binTotal, binIndex);
+
+        while (edger.next())
+        {
+            const EdgeInfo& edge(edger.getEdge());
+            BOOST_REQUIRE_EQUAL(loci.count(edge.locusIndex),0u);
+            loci.insert(edge.locusIndex);
+            count++;
+        }
+    }
+
+    BOOST_REQUIRE_EQUAL(count,7u);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/applications/GenerateSVCandidates/test/EdgeRetrieverLocusTest.cpp b/src/c++/lib/applications/GenerateSVCandidates/test/EdgeRetrieverLocusTest.cpp
new file mode 100644
index 0000000..79f4fd1
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/test/EdgeRetrieverLocusTest.cpp
@@ -0,0 +1,69 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/test/unit_test.hpp"
+
+#include "applications/GenerateSVCandidates/EdgeRetrieverLocus.hh"
+#include "svgraph/SVLocusSet.hh"
+
+#include "svgraph/test/SVLocusTestUtil.hh"
+
+
+BOOST_AUTO_TEST_SUITE( test_EdgeRetrieverLocus )
+
+
+BOOST_AUTO_TEST_CASE( test_EdgeRetrieverLocusSimple )
+{
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,2,30,40);
+
+    SVLocus locus2;
+    locusAddPair(locus2,3,10,20,4,30,40);
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 1;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus2);
+    set1.checkState(true,true);
+
+    BOOST_REQUIRE_EQUAL(set1.size(), 2u);
+
+    LocusEdgeOptions lopt;
+    lopt.locusIndex = 0;
+    EdgeRetrieverLocus edger(set1, 0, lopt);
+
+    BOOST_REQUIRE( edger.next() );
+
+    EdgeInfo edge = edger.getEdge();
+    BOOST_REQUIRE_EQUAL(edge.locusIndex, 0u);
+    BOOST_REQUIRE_EQUAL(edge.nodeIndex1, 0u);
+    BOOST_REQUIRE_EQUAL(edge.nodeIndex2, 1u);
+
+    BOOST_REQUIRE(! edger.next() );
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/applications/GenerateSVCandidates/test/SVCandidateAssemblyRefinerTest.cpp b/src/c++/lib/applications/GenerateSVCandidates/test/SVCandidateAssemblyRefinerTest.cpp
new file mode 100644
index 0000000..e5e6b24
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/test/SVCandidateAssemblyRefinerTest.cpp
@@ -0,0 +1,75 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/test/unit_test.hpp"
+
+// test static function in TU:
+#include "applications/GenerateSVCandidates/SVCandidateAssemblyRefiner.cpp"
+
+
+BOOST_AUTO_TEST_SUITE( test_SVRefiner )
+
+
+BOOST_AUTO_TEST_CASE( test_GetVariantRange )
+{
+    known_pos_range2 res;
+
+    const std::string seq1("ABCDDABC");
+    const std::string seq2("ABCDDDABC");
+
+    {
+        // left shifted case:
+        const known_pos_range2 seq1Range(3,3);
+        const known_pos_range2 seq2Range(3,4);
+
+        // order reflects a deletion
+        res = getVariantRange(seq2,seq2Range,seq1,seq1Range);
+        BOOST_REQUIRE_EQUAL(res.begin_pos(), 0);
+        BOOST_REQUIRE_EQUAL(res.end_pos(), 2);
+
+        // order reflects an insertion
+        res = getVariantRange(seq1,seq1Range,seq2,seq2Range);
+        BOOST_REQUIRE_EQUAL(res.begin_pos(), 0);
+        BOOST_REQUIRE_EQUAL(res.end_pos(), 2);
+    }
+
+    {
+        // right shifted case:
+        const known_pos_range2 seq1Range(5,5);
+        const known_pos_range2 seq2Range(5,6);
+
+        // order reflects a deletion
+        res = getVariantRange(seq2,seq2Range,seq1,seq1Range);
+        BOOST_REQUIRE_EQUAL(res.begin_pos(), -2);
+        BOOST_REQUIRE_EQUAL(res.end_pos(), 0);
+
+        // order reflects an insertion
+        res = getVariantRange(seq1,seq1Range,seq2,seq2Range);
+        BOOST_REQUIRE_EQUAL(res.begin_pos(), -2);
+        BOOST_REQUIRE_EQUAL(res.end_pos(), 0);
+    }
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/c++/lib/applications/GenerateSVCandidates/test/test_main.cpp b/src/c++/lib/applications/GenerateSVCandidates/test/test_main.cpp
new file mode 100644
index 0000000..411b2de
--- /dev/null
+++ b/src/c++/lib/applications/GenerateSVCandidates/test/test_main.cpp
@@ -0,0 +1,23 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#define BOOST_TEST_MODULE libapplications
+#include "boost/test/unit_test.hpp"
+
diff --git a/src/c++/lib/applications/GetAlignmentStats/AlignmentStatsOptions.cpp b/src/c++/lib/applications/GetAlignmentStats/AlignmentStatsOptions.cpp
new file mode 100644
index 0000000..c2e5112
--- /dev/null
+++ b/src/c++/lib/applications/GetAlignmentStats/AlignmentStatsOptions.cpp
@@ -0,0 +1,98 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+
+#include "AlignmentStatsOptions.hh"
+
+#include "blt_util/log.hh"
+#include "common/ProgramUtil.hh"
+#include "options/AlignmentFileOptionsParser.hh"
+#include "options/optionsUtil.hh"
+
+#include "boost/program_options.hpp"
+
+#include <iostream>
+
+
+
+static
+void
+usage(
+    std::ostream& os,
+    const illumina::Program& prog,
+    const boost::program_options::options_description& visible,
+    const char* msg = nullptr)
+{
+    usage(os, prog, visible, "get statistics for SV-calling from alignment files", " > stats", msg);
+}
+
+
+
+void
+parseAlignmentStatsOptions(const illumina::Program& prog,
+                           int argc, char* argv[],
+                           AlignmentStatsOptions& opt)
+{
+    namespace po = boost::program_options;
+    po::options_description req("configuration");
+    req.add_options()
+    ("output-file", po::value(&opt.outputFilename),
+     "write stats to filename (default: stdout)");
+
+    po::options_description help("help");
+    help.add_options()
+    ("help,h","print this message");
+
+    po::options_description aligndesc(getOptionsDescription(opt.alignFileOpt));
+
+    po::options_description visible("options");
+    visible.add(aligndesc).add(req).add(help);
+
+    bool po_parse_fail(false);
+    po::variables_map vm;
+    try
+    {
+        po::store(po::parse_command_line(argc, argv, visible,
+                                         po::command_line_style::unix_style ^ po::command_line_style::allow_short), vm);
+        po::notify(vm);
+    }
+    catch (const boost::program_options::error& e)     // todo:: find out what is the more specific exception class thrown by program options
+    {
+        log_os << "\nERROR: Exception thrown by option parser: " << e.what() << "\n";
+        po_parse_fail=true;
+    }
+
+    if ((argc<=1) || (vm.count("help")) || po_parse_fail)
+    {
+        usage(log_os,prog,visible);
+        log_os << "\n" << prog.name() << ": get statistics for SV-calling from alignment files\n\n";
+        log_os << "version: " << prog.version() << "\n\n";
+        log_os << "usage: " << prog.name() << " [options] > stats\n\n";
+        log_os << visible << "\n";
+        exit(EXIT_FAILURE);
+    }
+
+    std::string errorMsg;
+    if (parseOptions(vm, opt.alignFileOpt, errorMsg))
+    {
+        usage(log_os,prog,visible,errorMsg.c_str());
+    }
+
+}
diff --git a/src/c++/lib/applications/GetAlignmentStats/AlignmentStatsOptions.hh b/src/c++/lib/applications/GetAlignmentStats/AlignmentStatsOptions.hh
new file mode 100644
index 0000000..4c69e2d
--- /dev/null
+++ b/src/c++/lib/applications/GetAlignmentStats/AlignmentStatsOptions.hh
@@ -0,0 +1,38 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+
+#pragma once
+
+#include "common/Program.hh"
+#include "options/AlignmentFileOptions.hh"
+
+
+struct AlignmentStatsOptions
+{
+    AlignmentFileOptions alignFileOpt;
+    std::string outputFilename;
+};
+
+
+void
+parseAlignmentStatsOptions(const illumina::Program& prog,
+                           int argc, char* argv[],
+                           AlignmentStatsOptions& opt);
diff --git a/src/c++/lib/applications/GetAlignmentStats/CMakeLists.txt b/src/c++/lib/applications/GetAlignmentStats/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/applications/GetAlignmentStats/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/applications/GetAlignmentStats/GetAlignmentStats.cpp b/src/c++/lib/applications/GetAlignmentStats/GetAlignmentStats.cpp
new file mode 100644
index 0000000..00b6017
--- /dev/null
+++ b/src/c++/lib/applications/GetAlignmentStats/GetAlignmentStats.cpp
@@ -0,0 +1,66 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "GetAlignmentStats.hh"
+
+#include "AlignmentStatsOptions.hh"
+
+#include "blt_util/log.hh"
+#include "common/OutStream.hh"
+#include "manta/ReadGroupStatsUtil.hh"
+
+#include <cstdlib>
+
+#include <iostream>
+
+
+
+static
+void
+runAlignmentStats(const AlignmentStatsOptions& opt)
+{
+    // calculate fragment size statistics for all read groups in all bams
+
+    // instantiate early to test for filename/permissions problems
+    if (opt.alignFileOpt.alignmentFilename.empty())
+    {
+        log_os << "ERROR: No input files specified.\n";
+        exit(EXIT_FAILURE);
+    }
+
+    ReadGroupStatsSet rstats;
+    for (const std::string& file : opt.alignFileOpt.alignmentFilename)
+    {
+        extractReadGroupStatsFromBam(file,rstats);
+    }
+
+    rstats.save(opt.outputFilename.c_str());
+}
+
+
+void
+GetAlignmentStats::
+runInternal(int argc, char* argv[]) const
+{
+    AlignmentStatsOptions opt;
+
+    parseAlignmentStatsOptions(*this,argc,argv,opt);
+    runAlignmentStats(opt);
+}
diff --git a/src/c++/lib/applications/GetAlignmentStats/GetAlignmentStats.hh b/src/c++/lib/applications/GetAlignmentStats/GetAlignmentStats.hh
new file mode 100644
index 0000000..631d94b
--- /dev/null
+++ b/src/c++/lib/applications/GetAlignmentStats/GetAlignmentStats.hh
@@ -0,0 +1,41 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "common/Program.hh"
+
+
+/// estimate per-library information from alignment file(s)
+///
+struct GetAlignmentStats : public illumina::Program
+{
+    const char*
+    name() const
+    {
+        return "GetAlignmentStats";
+    }
+
+    void
+    runInternal(int argc, char* argv[]) const;
+};
diff --git a/src/c++/lib/applications/GetChromDepth/CMakeLists.txt b/src/c++/lib/applications/GetChromDepth/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/applications/GetChromDepth/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/applications/GetChromDepth/ChromDepthOptions.cpp b/src/c++/lib/applications/GetChromDepth/ChromDepthOptions.cpp
new file mode 100644
index 0000000..be2908b
--- /dev/null
+++ b/src/c++/lib/applications/GetChromDepth/ChromDepthOptions.cpp
@@ -0,0 +1,126 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \file
+
+#include "ChromDepthOptions.hh"
+
+#include "blt_util/log.hh"
+#include "options/optionsUtil.hh"
+
+#include "boost/program_options.hpp"
+
+#include <iostream>
+#include "../../common/ProgramUtil.hh"
+
+typedef std::vector<std::string> chroms_t;
+
+
+static
+void
+usage(
+    std::ostream& os,
+    const illumina::Program& prog,
+    const boost::program_options::options_description& visible,
+    const char* msg = nullptr)
+{
+    usage(os, prog, visible, "get chromosome depth information from alignment files", " [ > output ]", msg);
+}
+
+
+
+void
+parseChromDepthOptions(
+    const illumina::Program& prog,
+    int argc, char* argv[],
+    ChromDepthOptions& opt)
+{
+    namespace po = boost::program_options;
+    po::options_description req("configuration");
+    req.add_options()
+    ("align-file", po::value(&opt.alignmentFilename),
+     "alignment file in BAM or CRAM format")
+    ("chrom", po::value<chroms_t>(),
+     "chromosome name. May be supplied more than once. At least one entry required.")
+    ("output-file", po::value(&opt.outputFilename),
+     "write stats to filename (default: stdout)")
+    ;
+
+    po::options_description help("help");
+    help.add_options()
+    ("help,h","print this message");
+
+    po::options_description visible("options");
+    visible.add(req).add(help);
+
+    bool po_parse_fail(false);
+    po::variables_map vm;
+    try
+    {
+        po::store(po::parse_command_line(argc, argv, visible,
+                                         po::command_line_style::unix_style ^ po::command_line_style::allow_short), vm);
+        po::notify(vm);
+    }
+    catch (const boost::program_options::error& e)     // todo:: find out what is the more specific exception class thrown by program options
+    {
+        log_os << "\nERROR: Exception thrown by option parser: " << e.what() << "\n";
+        po_parse_fail=true;
+    }
+
+    if ((argc<=1) || (vm.count("help")) || po_parse_fail)
+    {
+        usage(log_os,prog,visible);
+        log_os << "\n" << prog.name() << ": get chromosome depth from alignment file\n\n";
+        log_os << "version: " << prog.version() << "\n\n";
+        log_os << "usage: " << prog.name() << " [options] > stats\n\n";
+        log_os << visible << "\n";
+        exit(EXIT_FAILURE);
+    }
+
+    if (vm.count("chrom"))
+    {
+        opt.chromNames=(boost::any_cast<chroms_t>(vm["chrom"].value()));
+    }
+
+    std::string errorMsg;
+    if      (checkStandardizeInputFile(opt.alignmentFilename, "alignment", errorMsg))
+    {
+    }
+    else if (opt.chromNames.empty())
+    {
+        errorMsg = "Need at least one chromosome name";
+    }
+    else
+    {
+        for (const std::string& chrom : opt.chromNames)
+        {
+            if (chrom.empty())
+            {
+                errorMsg = "Empty chromosome name";
+                break;
+            }
+        }
+    }
+
+    if (! errorMsg.empty())
+    {
+        usage(log_os, prog, visible, errorMsg.c_str());
+    }
+}
diff --git a/src/c++/lib/applications/GetChromDepth/ChromDepthOptions.hh b/src/c++/lib/applications/GetChromDepth/ChromDepthOptions.hh
new file mode 100644
index 0000000..f499b4f
--- /dev/null
+++ b/src/c++/lib/applications/GetChromDepth/ChromDepthOptions.hh
@@ -0,0 +1,41 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include "../../common/Program.hh"
+
+
+struct ChromDepthOptions
+{
+    std::string alignmentFilename;
+    std::vector<std::string> chromNames;
+    std::string outputFilename;
+};
+
+
+void
+parseChromDepthOptions(
+    const illumina::Program& prog,
+    int argc, char* argv[],
+    ChromDepthOptions& opt);
diff --git a/src/c++/lib/applications/GetChromDepth/GetChromDepth.cpp b/src/c++/lib/applications/GetChromDepth/GetChromDepth.cpp
new file mode 100644
index 0000000..17b3a1b
--- /dev/null
+++ b/src/c++/lib/applications/GetChromDepth/GetChromDepth.cpp
@@ -0,0 +1,69 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "GetChromDepth.hh"
+#include "ChromDepthOptions.hh"
+
+#include "blt_util/log.hh"
+#include "common/OutStream.hh"
+#include "manta/ReadChromDepthUtil.hh"
+
+#include <cstdlib>
+
+#include <iomanip>
+#include <iostream>
+
+
+
+static
+void
+getChromDepth(const ChromDepthOptions& opt)
+{
+    // check that we have write permission on the output file early:
+    {
+        OutStream outs(opt.outputFilename);
+    }
+
+    std::vector<double> chromDepth;
+    for (const std::string& chrom : opt.chromNames)
+    {
+        chromDepth.push_back(readChromDepthFromAlignment(opt.alignmentFilename, chrom));
+    }
+
+    OutStream outs(opt.outputFilename);
+    std::ostream& os(outs.getStream());
+
+    const unsigned chromCount(opt.chromNames.size());
+    for (unsigned chromIndex(0); chromIndex<chromCount; ++chromIndex)
+    {
+        os << opt.chromNames[chromIndex] << "\t" << std::fixed << std::setprecision(2) << chromDepth[chromIndex] << "\n";
+    }
+}
+
+
+void
+GetChromDepth::
+runInternal(int argc, char* argv[]) const
+{
+    ChromDepthOptions opt;
+
+    parseChromDepthOptions(*this,argc,argv,opt);
+    getChromDepth(opt);
+}
diff --git a/src/c++/lib/applications/GetChromDepth/GetChromDepth.hh b/src/c++/lib/applications/GetChromDepth/GetChromDepth.hh
new file mode 100644
index 0000000..a62036f
--- /dev/null
+++ b/src/c++/lib/applications/GetChromDepth/GetChromDepth.hh
@@ -0,0 +1,41 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "common/Program.hh"
+
+
+/// estimate per-library information from alignment file(s)
+///
+struct GetChromDepth : public illumina::Program
+{
+    const char*
+    name() const
+    {
+        return "GetChromDepth";
+    }
+
+    void
+    runInternal(int argc, char* argv[]) const;
+};
diff --git a/src/c++/lib/applications/MergeAlignmentStats/CMakeLists.txt b/src/c++/lib/applications/MergeAlignmentStats/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/applications/MergeAlignmentStats/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/applications/MergeAlignmentStats/MergeAlignmentStats.cpp b/src/c++/lib/applications/MergeAlignmentStats/MergeAlignmentStats.cpp
new file mode 100644
index 0000000..79a071f
--- /dev/null
+++ b/src/c++/lib/applications/MergeAlignmentStats/MergeAlignmentStats.cpp
@@ -0,0 +1,64 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "MergeAlignmentStatsOptions.hh"
+
+#include "blt_util/log.hh"
+#include "common/OutStream.hh"
+#include "manta/ReadGroupStatsUtil.hh"
+
+#include <cstdlib>
+
+#include <iostream>
+#include "MergeAlignmentStats.hh"
+
+
+
+static
+void
+mergeAlignmentStats(const MergeAlignmentStatsOptions& opt)
+{
+    if (opt.statsFiles.empty())
+    {
+        log_os << "ERROR: No input files specified.\n";
+        exit(EXIT_FAILURE);
+    }
+
+    ReadGroupStatsSet all_rstats;
+    for (const std::string& file : opt.statsFiles)
+    {
+        ReadGroupStatsSet rstats;
+        rstats.load(file.c_str());
+        all_rstats.merge(rstats);
+    }
+
+    all_rstats.save(opt.outputFilename.c_str());
+}
+
+
+void
+MergeAlignmentStats::
+runInternal(int argc, char* argv[]) const
+{
+    MergeAlignmentStatsOptions opt;
+
+    parseMergeAlignmentStatsOptions(*this,argc,argv,opt);
+    mergeAlignmentStats(opt);
+}
diff --git a/src/c++/lib/applications/MergeAlignmentStats/MergeAlignmentStats.hh b/src/c++/lib/applications/MergeAlignmentStats/MergeAlignmentStats.hh
new file mode 100644
index 0000000..634cf37
--- /dev/null
+++ b/src/c++/lib/applications/MergeAlignmentStats/MergeAlignmentStats.hh
@@ -0,0 +1,41 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "common/Program.hh"
+
+
+/// merge alignment stats information from multiple bam files
+///
+struct MergeAlignmentStats : public illumina::Program
+{
+    const char*
+    name() const
+    {
+        return "MergeAlignmentStats";
+    }
+
+    void
+    runInternal(int argc, char* argv[]) const;
+};
diff --git a/src/c++/lib/applications/MergeAlignmentStats/MergeAlignmentStatsOptions.cpp b/src/c++/lib/applications/MergeAlignmentStats/MergeAlignmentStatsOptions.cpp
new file mode 100644
index 0000000..a86753f
--- /dev/null
+++ b/src/c++/lib/applications/MergeAlignmentStats/MergeAlignmentStatsOptions.cpp
@@ -0,0 +1,136 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \file
+
+#include "MergeAlignmentStatsOptions.hh"
+#include "blt_util/log.hh"
+#include "options/AlignmentFileOptionsParser.hh"
+#include "options/optionsUtil.hh"
+
+#include "boost/program_options.hpp"
+
+#include <iostream>
+#include "../../common/ProgramUtil.hh"
+
+
+typedef std::vector<std::string> files_t;
+
+
+
+static
+void
+usage(
+    std::ostream& os,
+    const illumina::Program& prog,
+    const boost::program_options::options_description& visible,
+    const char* msg = nullptr)
+{
+    usage(os, prog, visible, "merge multiple sv-calling statistics files from 'GetAlignmentStats'", " > stats", msg);
+}
+
+
+
+void
+parseMergeAlignmentStatsOptions(
+    const illumina::Program& prog,
+    int argc, char* argv[],
+    MergeAlignmentStatsOptions& opt)
+{
+    namespace po = boost::program_options;
+    po::options_description req("configuration");
+    req.add_options()
+    ("align-stats-file", po::value<files_t>(),
+     "stats output of 'GetAlignmentStats' (may be specified multiple times)")
+    ("output-file", po::value(&opt.outputFilename),
+     "write merged stats to filename (default: stdout)");
+
+    po::options_description help("help");
+    help.add_options()
+    ("help,h","print this message");
+
+    po::options_description visible("options");
+    visible.add(req).add(help);
+
+    bool po_parse_fail(false);
+    po::variables_map vm;
+    try
+    {
+        po::store(po::parse_command_line(argc, argv, visible,
+                                         po::command_line_style::unix_style ^ po::command_line_style::allow_short), vm);
+        po::notify(vm);
+    }
+    catch (const boost::program_options::error& e)     // todo:: find out what is the more specific exception class thrown by program options
+    {
+        log_os << "\nERROR: Exception thrown by option parser: " << e.what() << "\n";
+        po_parse_fail=true;
+    }
+
+    if ((argc<=1) || (vm.count("help")) || po_parse_fail)
+    {
+        usage(log_os,prog,visible);
+        log_os << "\n" << prog.name() << ": get statistics for SV-calling from alignment files\n\n";
+        log_os << "version: " << prog.version() << "\n\n";
+        log_os << "usage: " << prog.name() << " [options] > stats\n\n";
+        log_os << visible << "\n";
+        exit(EXIT_FAILURE);
+    }
+
+    std::string errorMsg;
+    {
+        {
+            files_t statsInput;
+            if (vm.count("align-stats-file"))
+            {
+                statsInput=(boost::any_cast<files_t>(vm["align-stats-file"].value()));
+            }
+            opt.statsFiles = statsInput;
+        }
+
+        errorMsg.clear();
+        if (opt.statsFiles.empty())
+        {
+            errorMsg="Must specify at least one input stats file";
+        }
+        else
+        {
+            // check that stats files exist, and names do not repeat
+            std::set<std::string> nameCheck;
+            for (std::string& afile : opt.statsFiles)
+            {
+                if (checkStandardizeInputFile(afile,"alignment stats file",errorMsg)) break;
+                if (nameCheck.count(afile))
+                {
+                    std::ostringstream oss;
+                    oss << "Repeated alignment stats filename: " << afile << "\n";
+                    errorMsg = oss.str();
+                    break;
+                }
+                nameCheck.insert(afile);
+            }
+        }
+
+    }
+
+    if (! errorMsg.empty())
+    {
+        usage(log_os,prog,visible,errorMsg.c_str());
+    }
+}
diff --git a/src/c++/lib/applications/MergeAlignmentStats/MergeAlignmentStatsOptions.hh b/src/c++/lib/applications/MergeAlignmentStats/MergeAlignmentStatsOptions.hh
new file mode 100644
index 0000000..43ba4c0
--- /dev/null
+++ b/src/c++/lib/applications/MergeAlignmentStats/MergeAlignmentStatsOptions.hh
@@ -0,0 +1,40 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include "../../common/Program.hh"
+
+
+struct MergeAlignmentStatsOptions
+{
+    std::vector<std::string> statsFiles;
+    std::string outputFilename;
+};
+
+
+void
+parseMergeAlignmentStatsOptions(
+    const illumina::Program& prog,
+    int argc, char* argv[],
+    MergeAlignmentStatsOptions& opt);
diff --git a/src/c++/lib/applications/MergeEdgeStats/CMakeLists.txt b/src/c++/lib/applications/MergeEdgeStats/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/applications/MergeEdgeStats/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/applications/MergeEdgeStats/MESOptions.cpp b/src/c++/lib/applications/MergeEdgeStats/MESOptions.cpp
new file mode 100644
index 0000000..3983ebc
--- /dev/null
+++ b/src/c++/lib/applications/MergeEdgeStats/MESOptions.cpp
@@ -0,0 +1,153 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "MESOptions.hh"
+#include "blt_util/log.hh"
+#include "common/ProgramUtil.hh"
+
+#include "boost/filesystem.hpp"
+#include "boost/program_options.hpp"
+
+#include <iostream>
+#include <fstream>
+#include <set>
+#include <sstream>
+
+
+
+static
+void
+usage(
+    std::ostream& os,
+    const illumina::Program& prog,
+    const boost::program_options::options_description& visible,
+    const char* msg = nullptr)
+{
+    usage(os, prog, visible, "merge sv locus edge stats", "", msg);
+}
+
+
+
+void
+parseMESOptions(
+    const illumina::Program& prog,
+    int argc, char* argv[],
+    MESOptions& opt)
+{
+    namespace po = boost::program_options;
+    po::options_description req("configuration");
+
+    req.add_options()
+    ("stats-file", po::value(&opt.statsFilename),
+     "input sv edge stats file (may be specified multiple times)")
+    ("stats-file-list", po::value(&opt.statsFilenameList),
+     "file listing all input sv edge stats files, one filename per line (specified only once)")
+    ("output-file", po::value(&opt.outputFilename),
+     "merged output sv edge stats file (required)")
+    ("report-file", po::value(&opt.reportFilename),
+     "provide a summary report based on the merged edge stats");
+
+    po::options_description help("help");
+    help.add_options()
+    ("help,h","print this message");
+
+    po::options_description visible("options");
+    visible.add(req).add(help);
+
+    bool po_parse_fail(false);
+    po::variables_map vm;
+    try
+    {
+        po::store(po::parse_command_line(argc, argv, visible,
+                                         po::command_line_style::unix_style ^ po::command_line_style::allow_short), vm);
+        po::notify(vm);
+    }
+    catch (const boost::program_options::error& e)
+    {
+        // todo:: find out what is the more specific exception class thrown by program options
+        log_os << "\nERROR: Exception thrown by option parser: " << e.what() << "\n";
+        po_parse_fail=true;
+    }
+
+    if ((argc<=1) || (vm.count("help")) || po_parse_fail)
+    {
+        usage(log_os,prog,visible);
+    }
+
+    //read stat file names from a user-defined file
+    if (! opt.statsFilenameList.empty())
+    {
+        std::ifstream parFile(opt.statsFilenameList.c_str(), std::ios_base::in | std::ios_base::binary);
+        if (! parFile.good())
+        {
+            std::ostringstream osfl;
+            osfl << "Stats file list does not exist: '" << opt.statsFilenameList << "'";
+            usage(log_os, prog, visible, osfl.str().c_str());
+        }
+
+        std::string lineIn;
+        while (getline(parFile, lineIn))
+        {
+            if (lineIn.size() == 0) continue;
+            const unsigned sm1(lineIn.size()-1);
+            if (lineIn[sm1] == '\r')
+            {
+                if (sm1 == 0) continue;
+                lineIn.resize(sm1);
+            }
+            opt.statsFilename.push_back(lineIn);
+        };
+    }
+
+    // fast check of config state:
+    if (opt.statsFilename.empty())
+    {
+        usage(log_os,prog,visible, "Must specify at least 1 input sv edge stats file");
+    }
+
+    std::set<std::string> dupCheck;
+    for (const std::string& statsFilename : opt.statsFilename)
+    {
+        if (! boost::filesystem::exists(statsFilename))
+        {
+            std::ostringstream oss;
+            oss << "SV edge stats file does not exist: '" << statsFilename << "'";
+            usage(log_os,prog,visible,oss.str().c_str());
+        }
+
+        if (dupCheck.find(statsFilename) != dupCheck.end())
+        {
+            std::ostringstream oss;
+            oss << "Same SV edge stats file submitted multiple times: '" << statsFilename << "'";
+            usage(log_os,prog,visible,oss.str().c_str());
+        }
+        dupCheck.insert(statsFilename);
+    }
+
+    if (opt.outputFilename.empty())
+    {
+        usage(log_os,prog,visible, "Must specify sv edges stats output file");
+    }
+}
+
diff --git a/src/c++/lib/applications/MergeEdgeStats/MESOptions.hh b/src/c++/lib/applications/MergeEdgeStats/MESOptions.hh
new file mode 100644
index 0000000..a89165b
--- /dev/null
+++ b/src/c++/lib/applications/MergeEdgeStats/MESOptions.hh
@@ -0,0 +1,46 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "common/Program.hh"
+
+#include <string>
+#include <vector>
+
+
+struct MESOptions
+{
+    std::vector<std::string> statsFilename;
+    std::string statsFilenameList;
+    std::string outputFilename;
+    std::string reportFilename;
+};
+
+
+void
+parseMESOptions(
+    const illumina::Program& prog,
+    int argc, char* argv[],
+    MESOptions& opt);
diff --git a/src/c++/lib/applications/MergeEdgeStats/MergeEdgeStats.cpp b/src/c++/lib/applications/MergeEdgeStats/MergeEdgeStats.cpp
new file mode 100644
index 0000000..d22b661
--- /dev/null
+++ b/src/c++/lib/applications/MergeEdgeStats/MergeEdgeStats.cpp
@@ -0,0 +1,82 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "MergeEdgeStats.hh"
+#include "MESOptions.hh"
+#include "appstats/GSCEdgeStats.hh"
+#include "common/OutStream.hh"
+
+#include "blt_util/log.hh"
+
+
+
+static
+void
+runMES(const MESOptions& opt)
+{
+    {
+        // early test that we have permission to write to output file(s)
+        OutStream outs(opt.outputFilename);
+        if (! opt.reportFilename.empty())
+        {
+            OutStream reps(opt.reportFilename);
+        }
+    }
+
+    GSCEdgeStats mergedStats;
+    bool isFirst(true);
+    for (const std::string& statsFilename : opt.statsFilename)
+    {
+        if (isFirst)
+        {
+            mergedStats.load(statsFilename.c_str());
+            isFirst=false;
+        }
+        else
+        {
+            GSCEdgeStats inputStats;
+            inputStats.load(statsFilename.c_str());
+            mergedStats.merge(inputStats);
+        }
+
+    }
+
+    mergedStats.save(opt.outputFilename.c_str());
+    if (! opt.reportFilename.empty())
+    {
+        mergedStats.report(opt.reportFilename.c_str());
+    }
+}
+
+
+
+void
+MergeEdgeStats::
+runInternal(int argc, char* argv[]) const
+{
+    MESOptions opt;
+
+    parseMESOptions(*this,argc,argv,opt);
+    runMES(opt);
+}
diff --git a/src/c++/lib/applications/MergeEdgeStats/MergeEdgeStats.hh b/src/c++/lib/applications/MergeEdgeStats/MergeEdgeStats.hh
new file mode 100644
index 0000000..3236e11
--- /dev/null
+++ b/src/c++/lib/applications/MergeEdgeStats/MergeEdgeStats.hh
@@ -0,0 +1,42 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "common/Program.hh"
+
+
+///
+struct MergeEdgeStats : public illumina::Program
+{
+    const char*
+    name() const
+    {
+        return "MergeEdgeStats";
+    }
+
+    void
+    runInternal(int argc, char* argv[]) const;
+};
+
diff --git a/src/c++/lib/applications/MergeSVLoci/CMakeLists.txt b/src/c++/lib/applications/MergeSVLoci/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/applications/MergeSVLoci/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/applications/MergeSVLoci/MSLOptions.cpp b/src/c++/lib/applications/MergeSVLoci/MSLOptions.cpp
new file mode 100644
index 0000000..a83d1c6
--- /dev/null
+++ b/src/c++/lib/applications/MergeSVLoci/MSLOptions.cpp
@@ -0,0 +1,151 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "MSLOptions.hh"
+
+#include "blt_util/log.hh"
+#include "common/ProgramUtil.hh"
+
+#include "boost/filesystem.hpp"
+#include "boost/program_options.hpp"
+
+#include <iostream>
+#include <fstream>
+#include <set>
+#include <sstream>
+
+
+
+static
+void
+usage(
+    std::ostream& os,
+    const illumina::Program& prog,
+    const boost::program_options::options_description& visible,
+    const char* msg = nullptr)
+{
+    usage(os, prog, visible, "merge sv locus graphs", "", msg);
+}
+
+
+
+void
+parseMSLOptions(const illumina::Program& prog,
+                int argc, char* argv[],
+                MSLOptions& opt)
+{
+    namespace po = boost::program_options;
+    po::options_description req("configuration");
+    req.add_options()
+    ("graph-file", po::value(&opt.graphFilename),
+     "input sv locus graph file (may be specified multiple times)")
+    ("graph-file-list", po::value(&opt.graphFilenameList),
+     "file listing all input sv locus graph files, one filename per line (specified only once)")
+    ("output-file", po::value(&opt.outputFilename),
+     "merged output sv locus graph file")
+    ("verbose", po::value(&opt.isVerbose)->zero_tokens(),
+     "provide additional progress logging");
+
+    po::options_description help("help");
+    help.add_options()
+    ("help,h","print this message");
+
+    po::options_description visible("options");
+    visible.add(req).add(help);
+
+    bool po_parse_fail(false);
+    po::variables_map vm;
+    try
+    {
+        po::store(po::parse_command_line(argc, argv, visible,
+                                         po::command_line_style::unix_style ^ po::command_line_style::allow_short), vm);
+        po::notify(vm);
+    }
+    catch (const boost::program_options::error& e)
+    {
+        // todo:: find out what is the more specific exception class thrown by program options
+        log_os << "\nERROR: Exception thrown by option parser: " << e.what() << "\n";
+        po_parse_fail=true;
+    }
+
+    if ((argc<=1) || (vm.count("help")) || po_parse_fail)
+    {
+        usage(log_os,prog,visible);
+    }
+
+    //read graph file names from a user-defined file
+    if (! opt.graphFilenameList.empty())
+    {
+        std::ifstream parFile(opt.graphFilenameList.c_str(), std::ios_base::in | std::ios_base::binary);
+        if (! parFile.good())
+        {
+            std::ostringstream osfl;
+            osfl << "SV locus graph file list does not exist: '" << opt.graphFilenameList << "'";
+            usage(log_os, prog, visible, osfl.str().c_str());
+        }
+
+        std::string lineIn;
+        while (getline(parFile, lineIn))
+        {
+            if (lineIn.size() == 0) continue;
+            const unsigned sm1(lineIn.size()-1);
+            if (lineIn[sm1] == '\r')
+            {
+                if (sm1 == 0) continue;
+                lineIn.resize(sm1);
+            }
+            opt.graphFilename.push_back(lineIn);
+        }
+    }
+
+    // fast check of config state:
+    if (opt.graphFilename.empty())
+    {
+        usage(log_os,prog,visible, "Must specify at least 1 input sv locus graph file");
+    }
+
+    std::set<std::string> dupCheck;
+    for (const std::string& graphFilename : opt.graphFilename)
+    {
+        if (! boost::filesystem::exists(graphFilename))
+        {
+            std::ostringstream oss;
+            oss << "SV locus graph file does not exist: '" << graphFilename << "'";
+            usage(log_os,prog,visible,oss.str().c_str());
+        }
+
+        if (dupCheck.find(graphFilename) != dupCheck.end())
+        {
+            std::ostringstream oss;
+            oss << "Same SV locus graph file submitted multiple times: '" << graphFilename << "'";
+            usage(log_os,prog,visible,oss.str().c_str());
+        }
+        dupCheck.insert(graphFilename);
+    }
+    if (opt.outputFilename.empty())
+    {
+        usage(log_os,prog,visible, "Must specify a graph output file");
+    }
+}
+
diff --git a/src/c++/lib/applications/MergeSVLoci/MSLOptions.hh b/src/c++/lib/applications/MergeSVLoci/MSLOptions.hh
new file mode 100644
index 0000000..593f413
--- /dev/null
+++ b/src/c++/lib/applications/MergeSVLoci/MSLOptions.hh
@@ -0,0 +1,49 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "common/Program.hh"
+
+#include <string>
+#include <vector>
+
+
+struct MSLOptions
+{
+    MSLOptions() :
+        isVerbose(false)
+    {}
+
+    std::vector<std::string> graphFilename;
+    std::string graphFilenameList;
+    std::string outputFilename;
+    bool isVerbose;
+};
+
+
+void
+parseMSLOptions(const illumina::Program& prog,
+                int argc, char* argv[],
+                MSLOptions& opt);
diff --git a/src/c++/lib/applications/MergeSVLoci/MergeSVLoci.cpp b/src/c++/lib/applications/MergeSVLoci/MergeSVLoci.cpp
new file mode 100644
index 0000000..b2033db
--- /dev/null
+++ b/src/c++/lib/applications/MergeSVLoci/MergeSVLoci.cpp
@@ -0,0 +1,91 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "MergeSVLoci.hh"
+#include "MSLOptions.hh"
+
+#include "blt_util/log.hh"
+#include "common/OutStream.hh"
+#include "svgraph/SVLocusSet.hh"
+
+
+
+static
+void
+runMSL(const MSLOptions& opt)
+{
+    TimeTracker timer;
+    timer.resume();
+    {
+        // early test that we have permission to write to output file
+        OutStream outs(opt.outputFilename);
+    }
+
+    SVLocusSet mergedSet;
+
+    for (const std::string& graphFile : opt.graphFilename)
+    {
+        if (opt.isVerbose)
+        {
+            log_os << "INFO: Merging file: '" << graphFile << "'\n";
+        }
+
+        if (mergedSet.empty())
+        {
+            mergedSet.load(graphFile.c_str());
+        }
+        else
+        {
+            SVLocusSet inputSet;
+            inputSet.load(graphFile.c_str());
+            mergedSet.merge(inputSet);
+        }
+
+        if (opt.isVerbose)
+        {
+            log_os << "INFO: Finished merging file: '" << graphFile << "'\n";
+        }
+    }
+
+    mergedSet.finalize();
+    if (opt.isVerbose)
+    {
+        log_os << "INFO: Finished cleaning merged graph.\n";
+    }
+    timer.stop();
+    mergedSet.setMergeTime(timer.getTimes());
+    mergedSet.save(opt.outputFilename.c_str());
+}
+
+
+
+void
+MergeSVLoci::
+runInternal(int argc, char* argv[]) const
+{
+    MSLOptions opt;
+
+    parseMSLOptions(*this,argc,argv,opt);
+    runMSL(opt);
+}
diff --git a/src/c++/lib/applications/MergeSVLoci/MergeSVLoci.hh b/src/c++/lib/applications/MergeSVLoci/MergeSVLoci.hh
new file mode 100644
index 0000000..047720e
--- /dev/null
+++ b/src/c++/lib/applications/MergeSVLoci/MergeSVLoci.hh
@@ -0,0 +1,43 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "common/Program.hh"
+
+
+/// estimate per-library information from alignment file(s)
+///
+struct MergeSVLoci : public illumina::Program
+{
+    const char*
+    name() const
+    {
+        return "MergeSVLoci";
+    }
+
+    void
+    runInternal(int argc, char* argv[]) const;
+};
+
diff --git a/src/c++/lib/applications/SummarizeAlignmentStats/CMakeLists.txt b/src/c++/lib/applications/SummarizeAlignmentStats/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/applications/SummarizeAlignmentStats/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/applications/SummarizeAlignmentStats/SASOptions.cpp b/src/c++/lib/applications/SummarizeAlignmentStats/SASOptions.cpp
new file mode 100644
index 0000000..bc177c2
--- /dev/null
+++ b/src/c++/lib/applications/SummarizeAlignmentStats/SASOptions.cpp
@@ -0,0 +1,112 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "SASOptions.hh"
+
+#include "blt_util/log.hh"
+#include "options/optionsUtil.hh"
+
+#include "boost/program_options.hpp"
+
+#include <iostream>
+#include "../../common/ProgramUtil.hh"
+
+
+
+static
+void
+usage(
+    std::ostream& os,
+    const illumina::Program& prog,
+    const boost::program_options::options_description& visible,
+    const char* msg = nullptr)
+{
+    usage(os, prog, visible, "summarize fragment size stats", " [ > output]", msg);
+}
+
+
+
+static
+void
+checkStandardizeUsageFile(
+    std::ostream& os,
+    const illumina::Program& prog,
+    const boost::program_options::options_description& visible,
+    std::string& filename,
+    const char* fileLabel)
+{
+    std::string errorMsg;
+    if ( checkStandardizeInputFile(filename, fileLabel, errorMsg))
+    {
+        usage(os,prog,visible,errorMsg.c_str());
+    }
+}
+
+
+
+void
+parseSASOptions(
+    const illumina::Program& prog,
+    int argc, char* argv[],
+    SASOptions& opt)
+{
+    namespace po = boost::program_options;
+    po::options_description req("configuration");
+    req.add_options()
+    ("align-stats", po::value(&opt.statsFilename),
+     "pre-computed alignment statistics for the input alignment files (required)")
+    ("output-file", po::value(&opt.outputFilename),
+     "write fragment summary stats to filename (default: stdout)");
+    ;
+
+    po::options_description help("help");
+    help.add_options()
+    ("help,h","print this message");
+
+    po::options_description visible("options");
+    visible.add(req).add(help);
+
+    bool po_parse_fail(false);
+    po::variables_map vm;
+    try
+    {
+        po::store(po::parse_command_line(argc, argv, visible,
+                                         po::command_line_style::unix_style ^ po::command_line_style::allow_short), vm);
+        po::notify(vm);
+    }
+    catch (const boost::program_options::error& e)     // todo:: find out what is the more specific exception class thrown by program options
+    {
+        log_os << "\nERROR: Exception thrown by option parser: " << e.what() << "\n";
+        po_parse_fail=true;
+    }
+
+    if ((argc<=1) || (vm.count("help")) || po_parse_fail)
+    {
+        usage(log_os,prog,visible);
+    }
+
+    checkStandardizeUsageFile(log_os,prog,visible,opt.statsFilename,"alignment statistics");
+
+}
+
diff --git a/src/c++/lib/applications/SummarizeAlignmentStats/SASOptions.hh b/src/c++/lib/applications/SummarizeAlignmentStats/SASOptions.hh
new file mode 100644
index 0000000..ec52720
--- /dev/null
+++ b/src/c++/lib/applications/SummarizeAlignmentStats/SASOptions.hh
@@ -0,0 +1,43 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <string>
+#include "../../common/Program.hh"
+
+
+struct SASOptions
+{
+    std::string statsFilename;
+    std::string outputFilename;
+};
+
+
+void
+parseSASOptions(
+    const illumina::Program& prog,
+    int argc, char* argv[],
+    SASOptions& opt);
+
diff --git a/src/c++/lib/applications/SummarizeAlignmentStats/SummarizeAlignmentStats.cpp b/src/c++/lib/applications/SummarizeAlignmentStats/SummarizeAlignmentStats.cpp
new file mode 100644
index 0000000..4501671
--- /dev/null
+++ b/src/c++/lib/applications/SummarizeAlignmentStats/SummarizeAlignmentStats.cpp
@@ -0,0 +1,81 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "SummarizeAlignmentStats.hh"
+#include "SASOptions.hh"
+
+#include "blt_util/log.hh"
+#include "common/OutStream.hh"
+#include "manta/ReadGroupStatsSet.hh"
+
+#include <iostream>
+
+
+
+static
+void
+runSAS(const SASOptions& opt)
+{
+    static const float quantLevel[] = { 0.01f, 0.05f, 0.10f, 0.25f, 0.50f, 0.75f, 0.90f, 0.95f, 0.99f };
+    static const unsigned quantLevelCount(sizeof(quantLevel)/sizeof(float));
+
+    OutStream outs(opt.outputFilename);
+    std::ostream& report_os(outs.getStream());
+
+    ReadGroupStatsSet rgss;
+    rgss.load(opt.statsFilename.c_str());
+
+    const unsigned groupCount(rgss.size());
+    for (unsigned groupIndex(0); groupIndex<groupCount; ++groupIndex)
+    {
+        const ReadGroupStatsSet::KeyType& key(rgss.getKey(groupIndex));
+#ifdef READ_GROUPS
+        report_os << "bamFile:\t" << key.bamLabel << '\n';
+        report_os << "readGroup:\t" << key.rgLabel << '\n';
+#else
+        report_os << "group:\t" << key.bamLabel << '\n';
+#endif
+
+        const ReadGroupStats& rgs(rgss.getStats(groupIndex));
+        report_os << "fragment length observations:\t" << rgs.fragStats.totalObservations() << '\n';
+        report_os << "fragment length quantiles:\n";
+        for (unsigned quantLevelIndex(0); quantLevelIndex<quantLevelCount; ++quantLevelIndex)
+        {
+            report_os << quantLevel[quantLevelIndex] << '\t' << rgs.fragStats.quantile(quantLevel[quantLevelIndex]) << '\n';
+        }
+        report_os << '\n';
+    }
+}
+
+
+
+void
+SummarizeAlignmentStats::
+runInternal(int argc, char* argv[]) const
+{
+    SASOptions opt;
+
+    parseSASOptions(*this,argc,argv,opt);
+    runSAS(opt);
+}
diff --git a/src/c++/lib/applications/SummarizeAlignmentStats/SummarizeAlignmentStats.hh b/src/c++/lib/applications/SummarizeAlignmentStats/SummarizeAlignmentStats.hh
new file mode 100644
index 0000000..9ce12e9
--- /dev/null
+++ b/src/c++/lib/applications/SummarizeAlignmentStats/SummarizeAlignmentStats.hh
@@ -0,0 +1,42 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "common/Program.hh"
+
+
+/// estimate per-library information from alignment file(s)
+///
+struct SummarizeAlignmentStats : public illumina::Program
+{
+    const char*
+    name() const
+    {
+        return "SummarizeAlignmentStats";
+    }
+
+    void
+    runInternal(int argc, char* argv[]) const;
+};
diff --git a/src/c++/lib/applications/SummarizeSVLoci/CMakeLists.txt b/src/c++/lib/applications/SummarizeSVLoci/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/applications/SummarizeSVLoci/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/applications/SummarizeSVLoci/SSLOptions.cpp b/src/c++/lib/applications/SummarizeSVLoci/SSLOptions.cpp
new file mode 100644
index 0000000..fdb883d
--- /dev/null
+++ b/src/c++/lib/applications/SummarizeSVLoci/SSLOptions.cpp
@@ -0,0 +1,102 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "SSLOptions.hh"
+
+#include "blt_util/log.hh"
+#include "boost/filesystem.hpp"
+#include "boost/program_options.hpp"
+
+#include <iostream>
+#include "../../common/ProgramUtil.hh"
+
+
+
+static
+void
+usage(
+    std::ostream& os,
+    const illumina::Program& prog,
+    const boost::program_options::options_description& visible,
+    const char* msg = nullptr)
+{
+    usage(os, prog, visible, "write graph summary stats to stdout (tsv format)", " [ > output ]", msg);
+}
+
+
+
+void
+parseSSLOptions(
+    const illumina::Program& prog,
+    int argc, char* argv[],
+    SSLOptions& opt)
+{
+    namespace po = boost::program_options;
+    po::options_description req("configuration");
+    req.add_options()
+    ("graph-file", po::value(&opt.graphFilename),
+     "sv locus graph file")
+    ("global", po::value(&opt.isGlobalStats)->zero_tokens(),
+     "provide global stats on full graph (default output is per-locus stats)")
+    ("output-file", po::value(&opt.outputFilename),
+     "write graph summary stats to filename (default: stdout)");
+    ;
+
+    po::options_description help("help");
+    help.add_options()
+    ("help,h","print this message");
+
+    po::options_description visible("options");
+    visible.add(req).add(help);
+
+    bool po_parse_fail(false);
+    po::variables_map vm;
+    try
+    {
+        po::store(po::parse_command_line(argc, argv, visible,
+                                         po::command_line_style::unix_style ^ po::command_line_style::allow_short), vm);
+        po::notify(vm);
+    }
+    catch (const boost::program_options::error& e)     // todo:: find out what is the more specific exception class thrown by program options
+    {
+        log_os << "\nERROR: Exception thrown by option parser: " << e.what() << "\n";
+        po_parse_fail=true;
+    }
+
+    if ((argc<=1) || (vm.count("help")) || po_parse_fail)
+    {
+        usage(log_os,prog,visible);
+    }
+
+    // fast check of config state:
+    if (opt.graphFilename.empty())
+    {
+        usage(log_os,prog,visible,"Must specify sv locus graph file");
+    }
+    if (! boost::filesystem::exists(opt.graphFilename))
+    {
+        usage(log_os,prog,visible,"SV locus graph file does not exist");
+    }
+}
+
diff --git a/src/c++/lib/applications/SummarizeSVLoci/SSLOptions.hh b/src/c++/lib/applications/SummarizeSVLoci/SSLOptions.hh
new file mode 100644
index 0000000..447eab6
--- /dev/null
+++ b/src/c++/lib/applications/SummarizeSVLoci/SSLOptions.hh
@@ -0,0 +1,43 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <string>
+#include "../../common/Program.hh"
+
+
+struct SSLOptions
+{
+    std::string graphFilename;
+    bool isGlobalStats = false;
+    std::string outputFilename;
+};
+
+
+void
+parseSSLOptions(
+    const illumina::Program& prog,
+    int argc, char* argv[],
+    SSLOptions& opt);
diff --git a/src/c++/lib/applications/SummarizeSVLoci/SummarizeSVLoci.cpp b/src/c++/lib/applications/SummarizeSVLoci/SummarizeSVLoci.cpp
new file mode 100644
index 0000000..c007ff3
--- /dev/null
+++ b/src/c++/lib/applications/SummarizeSVLoci/SummarizeSVLoci.cpp
@@ -0,0 +1,66 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "SummarizeSVLoci.hh"
+#include "SSLOptions.hh"
+
+#include "common/OutStream.hh"
+#include "svgraph/SVLocusSet.hh"
+
+#include <iostream>
+
+
+
+static
+void
+runSSL(const SSLOptions& opt)
+{
+    SVLocusSet set;
+
+    set.load(opt.graphFilename.c_str());
+
+    OutStream outs(opt.outputFilename);
+    std::ostream& os(outs.getStream());
+
+    if (opt.isGlobalStats)
+    {
+        set.dumpStats(os);
+    }
+    else
+    {
+        set.dumpLocusStats(os);
+    }
+}
+
+
+
+void
+SummarizeSVLoci::
+runInternal(int argc, char* argv[]) const
+{
+    SSLOptions opt;
+
+    parseSSLOptions(*this,argc,argv,opt);
+    runSSL(opt);
+}
diff --git a/src/c++/lib/applications/SummarizeSVLoci/SummarizeSVLoci.hh b/src/c++/lib/applications/SummarizeSVLoci/SummarizeSVLoci.hh
new file mode 100644
index 0000000..6156a58
--- /dev/null
+++ b/src/c++/lib/applications/SummarizeSVLoci/SummarizeSVLoci.hh
@@ -0,0 +1,42 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "common/Program.hh"
+
+
+/// estimate per-library information from alignment file(s)
+///
+struct SummarizeSVLoci : public illumina::Program
+{
+    const char*
+    name() const
+    {
+        return "SummarizeSVLoci";
+    }
+
+    void
+    runInternal(int argc, char* argv[]) const;
+};
diff --git a/src/c++/lib/applications/TestAssembler/CMakeLists.txt b/src/c++/lib/applications/TestAssembler/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/applications/TestAssembler/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/applications/TestAssembler/TestAssembler.cpp b/src/c++/lib/applications/TestAssembler/TestAssembler.cpp
new file mode 100644
index 0000000..fb982f4
--- /dev/null
+++ b/src/c++/lib/applications/TestAssembler/TestAssembler.cpp
@@ -0,0 +1,77 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "TestAssembler.hh"
+
+static
+void
+runTestAssembler(const TestAssemblerOptions& opt)
+{
+    // check that we have write permission on the output file early:
+    {
+        OutStream outs(opt.outputFilename);
+    }
+
+    const ReadScannerOptions scanOpt;
+    const AssemblerOptions asmOpt;
+
+    AssemblyReadInput reads;
+    for (const std::string& file : opt.alignFileOpt.alignmentFilename)
+    {
+        log_os << "[INFO] Extracting reads from file: '" << file << "'\n";
+
+        extractAssemblyReadsFromBam(scanOpt, asmOpt, file.c_str(), reads);
+    }
+
+    AssemblyReadOutput readInfo;
+    Assembly contigs;
+
+    log_os << "[INFO] Assmbling read input.\n";
+
+#ifdef ITERATIVE_ASSEMBLER
+    runIterativeAssembler(asmOpt, reads, readInfo, contigs);
+#else
+    runSmallAssembler(asmOpt, reads, readInfo, contigs);
+#endif
+
+    OutStream outs(opt.outputFilename);
+    std::ostream& os(outs.getStream());
+
+    const unsigned contigCount(contigs.size());
+    log_os << "[INFO] Assembly complete. Contig count: " << contigCount << "\n";
+
+    for (unsigned contigIndex(0); contigIndex<contigCount; ++contigIndex)
+    {
+        os << ">Contig" << contigIndex << "\n";
+        os << contigs[contigIndex].seq << "\n";
+    }
+}
+
+
+
+void
+TestAssembler::
+runInternal(int argc, char* argv[]) const
+{
+    TestAssemblerOptions opt;
+
+    parseTestAssemblerOptions(*this,argc,argv,opt);
+    runTestAssembler(opt);
+}
diff --git a/src/c++/lib/applications/TestAssembler/TestAssembler.hh b/src/c++/lib/applications/TestAssembler/TestAssembler.hh
new file mode 100644
index 0000000..6368908
--- /dev/null
+++ b/src/c++/lib/applications/TestAssembler/TestAssembler.hh
@@ -0,0 +1,51 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <iostream>
+#include <cstdlib>
+
+#include "blt_util/log.hh"
+#include "common/OutStream.hh"
+#include "common/Program.hh"
+
+#include "assembly/IterativeAssembler.hh"
+#include "assembly/SmallAssembler.hh"
+#include "TestAssemblerOptions.hh"
+#include "extractAssemblyReads.hh"
+
+
+/// test front-end to run the manta assembler from command-line
+///
+struct TestAssembler : public illumina::Program
+{
+    const char*
+    name() const
+    {
+        return "TestAssembler";
+    }
+
+    void
+    runInternal(int argc, char* argv[]) const;
+};
diff --git a/src/c++/lib/applications/TestAssembler/TestAssemblerOptions.cpp b/src/c++/lib/applications/TestAssembler/TestAssemblerOptions.cpp
new file mode 100644
index 0000000..5ab18ca
--- /dev/null
+++ b/src/c++/lib/applications/TestAssembler/TestAssemblerOptions.cpp
@@ -0,0 +1,98 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+
+#include "TestAssemblerOptions.hh"
+
+#include "blt_util/log.hh"
+#include "common/ProgramUtil.hh"
+#include "options/AlignmentFileOptionsParser.hh"
+#include "options/optionsUtil.hh"
+
+#include "boost/program_options.hpp"
+
+#include <iostream>
+
+
+
+static
+void
+usage(
+    std::ostream& os,
+    const illumina::Program& prog,
+    const boost::program_options::options_description& visible,
+    const char* msg = nullptr)
+{
+    usage(os, prog, visible, "test manta assembler from command-line", " > contigs", msg);
+}
+
+
+
+void
+parseTestAssemblerOptions(
+    const illumina::Program& prog,
+    int argc, char* argv[],
+    TestAssemblerOptions& opt)
+{
+    namespace po = boost::program_options;
+    po::options_description req("configuration");
+    req.add_options()
+    ("output-file", po::value(&opt.outputFilename),
+     "write assembled contigs to filename (default: stdout)");
+
+    po::options_description help("help");
+    help.add_options()
+    ("help,h","print this message");
+
+    po::options_description aligndesc(getOptionsDescription(opt.alignFileOpt));
+
+    po::options_description visible("options");
+    visible.add(aligndesc).add(req).add(help);
+
+    bool po_parse_fail(false);
+    po::variables_map vm;
+    try
+    {
+        po::store(po::parse_command_line(argc, argv, visible,
+                                         po::command_line_style::unix_style ^ po::command_line_style::allow_short), vm);
+        po::notify(vm);
+    }
+    catch (const boost::program_options::error& e)     // todo:: find out what is the more specific exception class thrown by program options
+    {
+        log_os << "\nERROR: Exception thrown by option parser: " << e.what() << "\n";
+        po_parse_fail=true;
+    }
+
+    if ((argc<=1) || (vm.count("help")) || po_parse_fail)
+    {
+        usage(log_os,prog,visible);
+        log_os << "\n" << prog.name() << ": test manta assembler from command-line\n\n";
+        log_os << "version: " << prog.version() << "\n\n";
+        log_os << "usage: " << prog.name() << " [options] > contigs\n\n";
+        log_os << visible << "\n";
+        exit(EXIT_FAILURE);
+    }
+
+    std::string errorMsg;
+    if (parseOptions(vm, opt.alignFileOpt, errorMsg))
+    {
+        usage(log_os,prog,visible,errorMsg.c_str());
+    }
+}
diff --git a/src/c++/lib/applications/TestAssembler/TestAssemblerOptions.hh b/src/c++/lib/applications/TestAssembler/TestAssemblerOptions.hh
new file mode 100644
index 0000000..098df2d
--- /dev/null
+++ b/src/c++/lib/applications/TestAssembler/TestAssemblerOptions.hh
@@ -0,0 +1,39 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+
+#pragma once
+
+#include "common/Program.hh"
+#include "options/AlignmentFileOptions.hh"
+
+
+struct TestAssemblerOptions
+{
+    AlignmentFileOptions alignFileOpt;
+    std::string outputFilename;
+};
+
+
+void
+parseTestAssemblerOptions(
+    const illumina::Program& prog,
+    int argc, char* argv[],
+    TestAssemblerOptions& opt);
diff --git a/src/c++/lib/applications/TestAssembler/extractAssemblyReads.cpp b/src/c++/lib/applications/TestAssembler/extractAssemblyReads.cpp
new file mode 100644
index 0000000..bf630de
--- /dev/null
+++ b/src/c++/lib/applications/TestAssembler/extractAssemblyReads.cpp
@@ -0,0 +1,86 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "extractAssemblyReads.hh"
+#include "htsapi/bam_streamer.hh"
+#include "manta/ShadowReadFinder.hh"
+#include "manta/SVLocusScanner.hh"
+
+
+
+void
+extractAssemblyReadsFromBam(
+    const ReadScannerOptions& scanOpt,
+    const AssemblerOptions& asmOpt,
+    const char* bamFile,
+    AssemblyReadInput& reads)
+{
+    bam_streamer bamStream(bamFile);
+
+    ShadowReadFinder shadow(scanOpt.minSingletonMapqCandidates);
+
+    while (bamStream.next())
+    {
+        const bam_record& bamRead(*(bamStream.get_record_ptr()));
+
+        // filter out reads we ALWAYS filter out from manta
+        //
+        // don't filter out MAPQ0 because the split reads tend to have reduced mapping scores:
+        if (SVLocusScanner::isReadFilteredCore(bamRead)) continue;
+
+        if (bamRead.isNonStrictSupplement()) continue;
+
+        const bool isShadowKeeper(shadow.check(bamRead));
+
+        // only keep unmapped shadows????
+        ///TODO --- is this appropriate for this tool?
+        if ((not isShadowKeeper) and bamRead.is_unmapped()) continue;
+
+        bool isReversed(false);
+
+        // if shadow read, determine if we need to reverse:
+        if (isShadowKeeper)
+        {
+            if (bamRead.is_mate_fwd_strand())
+            {
+                isReversed = (! isReversed);
+            }
+        }
+
+        reads.push_back(bamRead.get_bam_read().get_string());
+
+        // should we recreate manta's fragmentation of reads at low-quality bases?
+        /// TODO --- is this appropriate for this tool?
+        const uint8_t minQval(asmOpt.minQval);
+        {
+            std::string& nread(reads.back());
+
+            const unsigned size(nread.size());
+            const uint8_t* qual(bamRead.qual());
+
+            for (unsigned i(0); i < size; ++i)
+            {
+                if (qual[i] < minQval) nread[i] = 'N';
+            }
+        }
+
+        if (isReversed) reverseCompStr(reads.back());
+    }
+}
diff --git a/src/c++/lib/applications/TestAssembler/extractAssemblyReads.hh b/src/c++/lib/applications/TestAssembler/extractAssemblyReads.hh
new file mode 100644
index 0000000..7546843
--- /dev/null
+++ b/src/c++/lib/applications/TestAssembler/extractAssemblyReads.hh
@@ -0,0 +1,45 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#pragma once
+
+#include "assembly/AssemblyReadInfo.hh"
+#include "options/ReadScannerOptions.hh"
+#include "options/SmallAssemblerOptions.hh"
+#include "options/IterativeAssemblerOptions.hh"
+
+//#define ITERATIVE_ASSEMBLER
+
+#ifdef ITERATIVE_ASSEMBLER
+typedef IterativeAssemblerOptions AssemblerOptions;
+#else
+typedef SmallAssemblerOptions AssemblerOptions;
+#endif
+
+
+/// load all reads form bam into assembly input structure with minimal
+/// filtration / input manipulation
+///
+void
+extractAssemblyReadsFromBam(
+    const ReadScannerOptions& scanOpt,
+    const AssemblerOptions& asmOpt,
+    const char* bamFile,
+    AssemblyReadInput& reads);
diff --git a/src/c++/lib/appstats/CMakeLists.txt b/src/c++/lib/appstats/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/appstats/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/appstats/GSCEdgeStats.cpp b/src/c++/lib/appstats/GSCEdgeStats.cpp
new file mode 100644
index 0000000..d62f9b8
--- /dev/null
+++ b/src/c++/lib/appstats/GSCEdgeStats.cpp
@@ -0,0 +1,191 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "GSCEdgeStats.hh"
+#include "blt_util/math_util.hh"
+
+#include "boost/archive/xml_iarchive.hpp"
+#include "boost/archive/xml_oarchive.hpp"
+
+#include <fstream>
+#include <iostream>
+
+
+
+void
+SimpleHist::
+report(std::ostream& os) const
+{
+    for (unsigned i(0); i<histdata.size(); ++i)
+    {
+        os << i;
+        if (i+1 == histdata.size()) os << "+";
+        os << "\t" << histdata[i] << "\n";
+    }
+}
+
+
+
+static
+void
+reportTime(
+    const char* label,
+    const CpuTimes ltime,
+    const uint64_t edgeCount,
+    const uint64_t candCount,
+    std::ostream& os)
+{
+    os << label << "Hours\t";
+    ltime.reportHr(os);
+    os << "\n";
+    os << label << "SecsPerEdge\t";
+    ltime.report(safeFrac(1,edgeCount),"s",os);
+    os << "\n";
+    os << label << "SecsPerCand\t";
+    ltime.report(safeFrac(1,candCount),"s",os);
+    os << "\n";
+}
+
+
+#if 0
+void
+reportTime(
+    const char* label,
+    const double ltime,
+    const uint64_t edgeCount,
+    const uint64_t candCount,
+    std::ostream& os)
+{
+    static const double secPerHour(3600);
+    os << label << "Hours_SecsPerEdge_SecsPerCand\t" << ltime/secPerHour
+       << "\t" << safeFrac(ltime,edgeCount)
+       << "\t" << safeFrac(ltime,candCount)
+       << "\n";
+}
+#endif
+
+
+
+void
+GSCEdgeGroupStats::
+report(std::ostream& os) const
+{
+    CpuTimes catTime(candTime);
+    catTime.merge(assemblyTime);
+    catTime.merge(scoringTime);
+    CpuTimes nocatTime(totalTime);
+    nocatTime.difference(catTime);
+
+    os << "InputEdgeCount\t" << totalInputEdgeCount << "\n";
+    os << "InputEdgeCandidatesPerEdge:\n";
+    candidatesPerEdge.report(os);
+    os << "CandidateCount\t" << totalCandidateCount << "\n";
+    os << "ComplexCandidateCount\t" << totalComplexCandidate << "\n";
+    finderStats.report(os);
+    os << "SpanningComplexCandidateFiltered\t" << totalSpanningCandidateFilter << "\n";
+    os << "JunctionAssemblyOverlapSkipped\t" << totalJunctionAssemblyOverlapSkips << "\n";
+    os << "JunctionCount\t" << totalJunctionCount << "\n";
+    os << "ComplexJunctionCount\t" << totalComplexJunctionCount << "\n";
+    os << "BreaksPerJunction:\n";
+    breaksPerJunction.report(os);
+    os << "TotalAssemblyCandidates\t" << totalAssemblyCandidates << "\n";
+    os << "TotalSpanningAssemblyCandidates\t" << totalSpanningAssemblyCandidates << "\n";
+    os << "AssemblyCandidatesPerJunction:\n";
+    assemblyCandidatesPerJunction.report(os);
+    reportTime("total",totalTime,totalInputEdgeCount,totalCandidateCount, os);
+    reportTime("candi",candTime,totalInputEdgeCount,totalCandidateCount, os);
+    reportTime("assem",assemblyTime,totalInputEdgeCount,totalCandidateCount, os);
+    reportTime("score",scoringTime,totalInputEdgeCount,totalCandidateCount, os);
+    reportTime("nocat",nocatTime,totalInputEdgeCount,totalCandidateCount, os);
+}
+
+
+
+void
+GSCEdgeStatsData::
+report(std::ostream& os) const
+{
+    using namespace BOOST_TIMER_HELPER;
+    GSCEdgeGroupStats all(remoteEdges);
+    all.merge(selfEdges);
+    os << "SVGenTotalHours\t";
+    lifeTime.reportHr(os);
+    os << "\n";
+    CpuTimes nonEdge(lifeTime);
+    nonEdge.difference(all.totalTime);
+    os << "NonEdgeHours\t";
+    nonEdge.reportHr(os);
+    os << "\n";
+    os << "\n[AllEdges]\n";
+    all.report(os);
+    os << "\n[RemoteEdges]\n";
+    remoteEdges.report(os);
+    os << "\n[SelfEdges]\n";
+    selfEdges.report(os);
+}
+
+
+
+void
+GSCEdgeStats::
+load(const char* filename)
+{
+    assert(nullptr != filename);
+    std::ifstream ifs(filename);
+    boost::archive::xml_iarchive ia(ifs);
+    ia >> BOOST_SERIALIZATION_NVP(edgeData);
+}
+
+
+
+void
+GSCEdgeStats::
+save(std::ostream& os) const
+{
+    boost::archive::xml_oarchive oa(os);
+    oa << BOOST_SERIALIZATION_NVP(edgeData);
+}
+
+
+
+void
+GSCEdgeStats::
+save(const char* filename) const
+{
+    assert(nullptr != filename);
+    std::ofstream ofs(filename);
+    save(ofs);
+}
+
+
+
+void
+GSCEdgeStats::
+report(const char* filename) const
+{
+    assert(nullptr != filename);
+    std::ofstream ofs(filename);
+    ofs << "EdgeStatsReport\n";
+    edgeData.report(ofs);
+}
diff --git a/src/c++/lib/appstats/GSCEdgeStats.hh b/src/c++/lib/appstats/GSCEdgeStats.hh
new file mode 100644
index 0000000..225a4aa
--- /dev/null
+++ b/src/c++/lib/appstats/GSCEdgeStats.hh
@@ -0,0 +1,223 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "SVFinderStats.hh"
+#include "blt_util/time_util.hh"
+
+#include "boost/serialization/nvp.hpp"
+#include "boost/serialization/vector.hpp"
+
+#include <cassert>
+#include <cstdint>
+
+#include <iosfwd>
+#include <vector>
+
+
+struct SimpleHist
+{
+    explicit
+    SimpleHist(
+        const unsigned size)
+        : histdata(size,0)
+    {
+        assert (size!=0);
+    }
+
+    void
+    increment(
+        const unsigned val)
+    {
+        if (val >= histdata.size())
+        {
+            histdata.back()++;
+            return;
+        }
+        histdata[val]++;
+    }
+
+    void
+    merge(const SimpleHist& rhs)
+    {
+        assert(histdata.size() == rhs.histdata.size());
+        for (unsigned i(0); i<histdata.size(); i++)
+        {
+            histdata[i] += rhs.histdata[i];
+        }
+    }
+
+    void
+    report(std::ostream& os) const;
+
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /* version */)
+    {
+        ar& BOOST_SERIALIZATION_NVP(histdata)
+        ;
+    }
+    std::vector<uint64_t> histdata;
+};
+
+BOOST_CLASS_IMPLEMENTATION(SimpleHist, boost::serialization::object_serializable)
+
+
+/// aggregate statistics over a group of GSV edges
+struct GSCEdgeGroupStats
+{
+    GSCEdgeGroupStats()
+        : candidatesPerEdge(6),
+          assemblyCandidatesPerJunction(6),
+          breaksPerJunction(4)
+    {}
+
+    void
+    merge(const GSCEdgeGroupStats& rhs)
+    {
+        totalTime.merge(rhs.totalTime);
+        candTime.merge(rhs.candTime);
+        assemblyTime.merge(rhs.assemblyTime);
+        scoringTime.merge(rhs.scoringTime);
+        totalInputEdgeCount += rhs.totalInputEdgeCount;
+        totalCandidateCount += rhs.totalCandidateCount;
+        totalComplexCandidate += rhs.totalComplexCandidate;
+        totalSpanningCandidateFilter += rhs.totalSpanningCandidateFilter;
+        totalSpanningCandidateFilter += rhs.totalJunctionAssemblyOverlapSkips;
+        totalJunctionCount += rhs.totalJunctionCount;
+        totalComplexJunctionCount += rhs.totalComplexJunctionCount;
+        totalAssemblyCandidates += rhs.totalAssemblyCandidates;
+        totalSpanningAssemblyCandidates += rhs.totalSpanningAssemblyCandidates;
+        candidatesPerEdge.merge(rhs.candidatesPerEdge);
+        assemblyCandidatesPerJunction.merge(rhs.assemblyCandidatesPerJunction);
+        breaksPerJunction.merge(rhs.breaksPerJunction);
+        finderStats.merge(rhs.finderStats);
+    }
+
+    void
+    report(std::ostream& os) const;
+
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /* version */)
+    {
+        ar& BOOST_SERIALIZATION_NVP(totalTime)
+        & BOOST_SERIALIZATION_NVP(candTime)
+        & BOOST_SERIALIZATION_NVP(assemblyTime)
+        & BOOST_SERIALIZATION_NVP(scoringTime)
+        & BOOST_SERIALIZATION_NVP(totalInputEdgeCount)
+        & BOOST_SERIALIZATION_NVP(totalCandidateCount)
+        & BOOST_SERIALIZATION_NVP(totalComplexCandidate)
+        & BOOST_SERIALIZATION_NVP(totalSpanningCandidateFilter)
+        & BOOST_SERIALIZATION_NVP(totalJunctionAssemblyOverlapSkips)
+        & BOOST_SERIALIZATION_NVP(totalJunctionCount)
+        & BOOST_SERIALIZATION_NVP(totalComplexJunctionCount)
+        & BOOST_SERIALIZATION_NVP(totalAssemblyCandidates)
+        & BOOST_SERIALIZATION_NVP(totalSpanningAssemblyCandidates)
+        & BOOST_SERIALIZATION_NVP(candidatesPerEdge)
+        & BOOST_SERIALIZATION_NVP(assemblyCandidatesPerJunction)
+        & BOOST_SERIALIZATION_NVP(breaksPerJunction)
+        & BOOST_SERIALIZATION_NVP(finderStats)
+        ;
+    }
+
+    CpuTimes totalTime;
+    CpuTimes candTime;
+    CpuTimes assemblyTime;
+    CpuTimes scoringTime;
+    uint64_t totalInputEdgeCount = 0;
+    uint64_t totalCandidateCount = 0;
+    uint64_t totalComplexCandidate = 0;
+    uint64_t totalSpanningCandidateFilter = 0;
+    uint64_t totalJunctionAssemblyOverlapSkips = 0;
+    uint64_t totalJunctionCount = 0;
+    uint64_t totalComplexJunctionCount = 0;
+    uint64_t totalAssemblyCandidates = 0;
+    uint64_t totalSpanningAssemblyCandidates = 0;
+
+    SimpleHist candidatesPerEdge;
+    SimpleHist assemblyCandidatesPerJunction;
+    SimpleHist breaksPerJunction;
+
+    SVFinderStats finderStats;
+};
+
+BOOST_CLASS_IMPLEMENTATION(GSCEdgeGroupStats, boost::serialization::object_serializable)
+
+
+struct GSCEdgeStatsData
+{
+    GSCEdgeStatsData() {}
+
+    void
+    merge(const GSCEdgeStatsData& rhs)
+    {
+        lifeTime.merge(rhs.lifeTime);
+        selfEdges.merge(rhs.selfEdges);
+        remoteEdges.merge(rhs.remoteEdges);
+    }
+
+    void
+    report(std::ostream& os) const;
+
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /* version */)
+    {
+        ar& BOOST_SERIALIZATION_NVP(lifeTime)
+        & BOOST_SERIALIZATION_NVP(selfEdges)
+        & BOOST_SERIALIZATION_NVP(remoteEdges);
+    }
+
+    CpuTimes lifeTime;
+    GSCEdgeGroupStats selfEdges;
+    GSCEdgeGroupStats remoteEdges;
+};
+
+BOOST_CLASS_IMPLEMENTATION(GSCEdgeStatsData, boost::serialization::object_serializable)
+
+
+
+struct GSCEdgeStats
+{
+    void
+    load(const char* filename);
+
+    void
+    save(std::ostream& os) const;
+
+    void
+    save(const char* filename) const;
+
+    void
+    report(const char* filename) const;
+
+    void
+    merge(const GSCEdgeStats& rhs)
+    {
+        edgeData.merge(rhs.edgeData);
+    }
+
+    GSCEdgeStatsData edgeData;
+};
+
+BOOST_CLASS_IMPLEMENTATION(GSCEdgeStats, boost::serialization::object_serializable)
diff --git a/src/c++/lib/appstats/SVFinderStats.cpp b/src/c++/lib/appstats/SVFinderStats.cpp
new file mode 100644
index 0000000..1854463
--- /dev/null
+++ b/src/c++/lib/appstats/SVFinderStats.cpp
@@ -0,0 +1,39 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "SVFinderStats.hh"
+#include <iostream>
+
+
+
+void
+SVFinderStats::
+report(std::ostream& os) const
+{
+    os << "EdgeFilter\t" << edgeFilter << "\n";
+    os << "SemiMappedFilter\t" << semiMappedFilter << "\n";
+    os << "ComplexLowCountFilter\t" << ComplexLowCountFilter << "\n";
+    os << "ComplexLowSignalFilter\t" << ComplexLowSignalFilter << "\n";
+    os << "UnmatchedReadPairFilter\t" << unmatchedReadPairFilter << "\n";
+}
diff --git a/src/c++/lib/appstats/SVFinderStats.hh b/src/c++/lib/appstats/SVFinderStats.hh
new file mode 100644
index 0000000..5ea482c
--- /dev/null
+++ b/src/c++/lib/appstats/SVFinderStats.hh
@@ -0,0 +1,72 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "boost/serialization/nvp.hpp"
+
+#include <cstdint>
+
+#include <iosfwd>
+
+
+struct SVFinderStats
+{
+    SVFinderStats() {}
+
+    void
+    merge(
+        const SVFinderStats& rhs)
+    {
+        edgeFilter += rhs.edgeFilter;
+        semiMappedFilter += rhs.semiMappedFilter;
+        ComplexLowCountFilter += rhs.ComplexLowCountFilter;
+        ComplexLowSignalFilter += rhs.ComplexLowSignalFilter;
+        unmatchedReadPairFilter += rhs.unmatchedReadPairFilter;
+    }
+
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /* version */)
+    {
+        ar& BOOST_SERIALIZATION_NVP(edgeFilter)
+        & BOOST_SERIALIZATION_NVP(semiMappedFilter)
+        & BOOST_SERIALIZATION_NVP(ComplexLowCountFilter)
+        & BOOST_SERIALIZATION_NVP(ComplexLowSignalFilter)
+        & BOOST_SERIALIZATION_NVP(unmatchedReadPairFilter)
+        ;
+    }
+
+    void
+    report(std::ostream& os) const;
+
+
+    uint64_t edgeFilter = 0;
+    uint64_t semiMappedFilter = 0;
+    uint64_t ComplexLowCountFilter = 0;
+    uint64_t ComplexLowSignalFilter = 0;
+    uint64_t unmatchedReadPairFilter = 0;
+};
+
+BOOST_CLASS_IMPLEMENTATION(SVFinderStats, boost::serialization::object_serializable)
+
diff --git a/src/c++/lib/assembly/AssembledContig.cpp b/src/c++/lib/assembly/AssembledContig.cpp
new file mode 100644
index 0000000..37edda5
--- /dev/null
+++ b/src/c++/lib/assembly/AssembledContig.cpp
@@ -0,0 +1,42 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Ole Schulz-Trieglaff
+///
+
+#include "AssembledContig.hh"
+#include "blt_util/seq_printer.hh"
+
+#include <iostream>
+
+
+
+std::ostream&
+operator<<(std::ostream& os, const AssembledContig& contig)
+{
+    os << "CONTIG size: " << contig.seq.size()
+       << " seedCount: " << contig.seedReadCount
+       << " seq:\n";
+    printSeq(contig.seq,os);
+    os << "\n";
+
+    return os;
+}
diff --git a/src/c++/lib/assembly/AssembledContig.hh b/src/c++/lib/assembly/AssembledContig.hh
new file mode 100644
index 0000000..da16839
--- /dev/null
+++ b/src/c++/lib/assembly/AssembledContig.hh
@@ -0,0 +1,60 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Ole Schulz-Trieglaff
+///
+
+#pragma once
+
+#include "blt_util/known_pos_range2.hh"
+
+#include <iosfwd>
+#include <string>
+#include <vector>
+#include <set>
+
+
+/// \brief data pertaining to a de-novo assembly contig
+///
+/// stores for each contig the sequence and the number of reads
+/// containing its seeding k-mer
+///
+struct AssembledContig
+{
+    std::string seq; ///< contigsequence
+
+    // reads used for assembly of contig <read_no,mapping position to contig>
+    //std::map<std::string,int> contigReads;
+
+    unsigned seedReadCount = 0; ///< no of reads containing the seeding kmer
+
+    std::set<unsigned> supportReads;
+    std::set<unsigned> rejectReads;
+
+    known_pos_range2 conservativeRange; ///< subsection of the contig with conservative coverage
+};
+
+
+std::ostream& operator<<(std::ostream& os, const AssembledContig& contig);
+
+
+typedef std::vector<AssembledContig> Assembly;
+
diff --git a/src/c++/lib/assembly/AssemblyReadInfo.hh b/src/c++/lib/assembly/AssemblyReadInfo.hh
new file mode 100644
index 0000000..f068169
--- /dev/null
+++ b/src/c++/lib/assembly/AssemblyReadInfo.hh
@@ -0,0 +1,44 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Ole Schulz-Trieglaff
+///
+
+#pragma once
+
+#include <vector>
+#include <string>
+
+
+/// information added to each read in the process of assembly
+///
+struct AssemblyReadInfo
+{
+    bool isUsed = false;
+    bool isFiltered = false; ///< if true, the read was 'used' but filtered out, so there is no meaningful contig id association
+    bool isPseudo = false; ///< if true, the read was an assembled contig
+    unsigned contigId = 0; ///< index of the contig that this read is used in
+};
+
+
+typedef std::vector<std::string> AssemblyReadInput;
+typedef std::vector<bool> AssemblyReadReversal;
+typedef std::vector<AssemblyReadInfo> AssemblyReadOutput;
diff --git a/src/c++/lib/assembly/CMakeLists.txt b/src/c++/lib/assembly/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/assembly/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/assembly/IterativeAssembler.cpp b/src/c++/lib/assembly/IterativeAssembler.cpp
new file mode 100644
index 0000000..9ed5d8a
--- /dev/null
+++ b/src/c++/lib/assembly/IterativeAssembler.cpp
@@ -0,0 +1,927 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Xiaoyu Chen
+/// \author Ole Schulz-Trieglaff
+///
+
+
+#include "assembly/IterativeAssembler.hh"
+#include "blt_util/set_util.hh"
+
+#include "boost/foreach.hpp"
+
+#include <cassert>
+
+#include <algorithm>
+#include <unordered_map>
+#include <vector>
+
+
+// compile with this macro to get verbose output:
+//#define DEBUG_ASBL
+//#define DEBUG_WALK
+
+
+// stream used by DEBUG_ASBL:
+#ifdef DEBUG_ASBL
+#include "blt_util/log.hh"
+#include <iostream>
+
+static
+void print_unsignSet(const std::set<unsigned>& unsignSet)
+{
+    log_os << "[";
+    for (const unsigned us : unsignSet)
+    {
+        log_os << us << ",";
+    }
+    log_os << "]\n";
+}
+
+static
+void print_stringSet(const std::set<std::string>& strSet)
+{
+    log_os << "[";
+    for (const std::string& str : strSet)
+    {
+        log_os << str << ",";
+    }
+    log_os << "]\n";
+}
+#endif
+
+
+// maps kmers to positions in read
+typedef std::unordered_map<std::string,unsigned> str_uint_map_t;
+// maps kmers to support reads
+typedef std::unordered_map<std::string,std::set<unsigned> > str_set_uint_map_t;
+typedef std::unordered_map<std::string, std::pair<unsigned,unsigned> > str_pair_uint_map_t;
+
+
+
+/**
+ * Adds base @p base to the end (isEnd is true) or start (otherwise) of the contig.
+ *
+ *	@return The extended contig.
+ */
+static
+std::string
+addBase(
+    const std::string& contig,
+    const char base,
+    const bool isEnd)
+{
+    if (isEnd) return contig + base;
+    else       return base + contig;
+}
+
+
+
+/**
+ * Returns a suffix (isEnd is true) or prefix (otherwise) of @p contig with length @p length.
+ *
+ *	@return The suffix or prefix.
+ */
+static
+std::string
+getEnd(
+    const std::string& contig,
+    const unsigned length,
+    const bool isEnd)
+{
+
+    const unsigned csize(contig.size());
+    assert(length <= csize);
+
+    if (isEnd) return contig.substr((csize-length),length);
+    else       return contig.substr(0,length);
+}
+
+
+#if 0
+/// adapt Ole's function to represent word hash as a graph:
+///
+void
+wordHashToDot(
+    const str_uint_map_t& wordCount,
+    std::ostream& os)
+{
+    static const unsigned MIN_KMER_FREQ(1);
+
+    // kmer nodes with coverage higher than this get a different color
+    //static const int lowCovGraphVisThreshold(3);
+    static const unsigned lowCovGraphVisThreshold(MIN_KMER_FREQ);
+    static const std::string lowCovNodeColor("red");
+    static const std::string highCovNodeColor("green");
+
+    os << "graph {\n";
+    os << "node [ style = filled ];\n";
+    str_uint_map_t aliasH;
+    unsigned n(0);
+    for (const str_uint_map_t::value_type& val : wordCount)
+    {
+        const std::string& word(val.first);
+        const unsigned cov(val.second);
+        aliasH[word] = n++;
+        const std::string& color(cov>lowCovGraphVisThreshold ? highCovNodeColor : lowCovNodeColor);
+        os << word << "[label=\"cov" << cov << "\" color=" << color << "]\n";
+    }
+
+    // need to add edges here
+    static const bool isEnd(true);
+    for (const str_uint_map_t::value_type& val : wordCount)
+    {
+        const std::string& word(val.first);
+        const std::string tmp(getEnd(word,word.size()-1,isEnd));
+        for (const char symbol : alphabet)
+        {
+            const std::string newKey(addBase(tmp,symbol,isEnd));
+            if (wordCount.find(newKey) != wordCount.end())
+            {
+                os << aliasH[word] << " -- " << aliasH[newKey] << ";\n";
+            }
+        }
+    }
+    os << "}\n";
+}
+#endif
+
+
+
+/**
+ * Extends the seed contig (aka most frequent k-mer)
+ *
+ */
+static
+bool
+walk(const IterativeAssemblerOptions& opt,
+     const std::string& seed,
+     const unsigned wordLength,
+     const str_uint_map_t& wordCount,
+     const str_set_uint_map_t& wordReads,
+     const std::set<std::string>& repeatWords,
+     std::set<std::string>& unusedWords,
+     AssembledContig& contig)
+{
+    const str_uint_map_t::const_iterator wordCountEnd(wordCount.cend());
+    const str_set_uint_map_t::const_iterator wordReadsEnd(wordReads.cend());
+
+    // we start with the seed
+    str_set_uint_map_t::const_iterator wordReadsIter(wordReads.find(seed));
+    assert(wordReadsIter != wordReadsEnd);
+    contig.supportReads = wordReadsIter->second;
+    contig.seq = seed;
+    // collecting rejecting reads for the seed from the unselected branches
+    for (const char symbol : opt.alphabet)
+    {
+        // the seed itself
+        if (symbol == seed[wordLength-1]) continue;
+
+        // add rejecting reads from an unselected word/branch
+        const std::string tmpBack = getEnd(seed, wordLength-1, false);
+        const std::string newKey(addBase(tmpBack, symbol, true));
+#ifdef DEBUG_WALK
+        log_os << "Extending end backwords: base " << symbol << " " << newKey << "\n";
+#endif
+
+        wordReadsIter= wordReads.find(newKey);
+        if (wordReadsIter == wordReadsEnd) continue;
+        const std::set<unsigned>& unselectedReads(wordReadsIter->second);
+#ifdef DEBUG_WALK
+        log_os << "Supporting reads for the backwards word : ";
+        print_unsignSet(unselectedReads);
+#endif
+
+        contig.rejectReads.insert(unselectedReads.begin(), unselectedReads.end());
+#ifdef DEBUG_WALK
+        log_os << "seed's rejecting reads : ";
+        print_unsignSet(contig.rejectReads);
+#endif
+    }
+
+    unusedWords.erase(seed);
+
+    if (repeatWords.find(seed) != repeatWords.end())
+    {
+#ifdef DEBUG_WALK
+        log_os << "The seed is a repeat word " << seed << ". Stop walk.\n";
+#endif
+        contig.conservativeRange.set_begin_pos(0);
+        contig.conservativeRange.set_end_pos(wordLength);
+        return true;
+    }
+
+    bool isRepeatFound(false);
+
+    // 0 => walk to the right, 1 => walk to the left
+    for (unsigned mode(0); mode<2; ++mode)
+    {
+        const bool isEnd(mode==0);
+        unsigned conservativeEndOffset(0);
+
+        while (true)
+        {
+            const std::string previousWord = getEnd(contig.seq, wordLength, isEnd);
+            const std::string trunk(getEnd(contig.seq, wordLength-1, isEnd));
+#ifdef DEBUG_WALK
+            log_os << "# current contig : " << contig.seq << " size : " << contig.seq.size() << "\n"
+                   << " getEnd : " << trunk << "\n";
+            log_os << "contig rejecting reads : ";
+            print_unsignSet(contig.rejectReads);
+            log_os << "contig supporting reads : ";
+            print_unsignSet(contig.supportReads);
+#endif
+
+            unsigned maxBaseCount(0);
+            unsigned maxSharedReadCount(0);
+            char maxBase(opt.alphabet[0]);
+            std::string maxWord;
+            std::set<unsigned> maxWordReads;
+            std::set<unsigned> maxSharedReads;
+            std::set<unsigned> previousWordReads;
+            std::set<unsigned> supportReads2Remove;
+            std::set<unsigned> rejectReads2Add;
+
+            for (const char symbol : opt.alphabet)
+            {
+                const std::string newKey(addBase(trunk, symbol, isEnd));
+#ifdef DEBUG_WALK
+                log_os << "Extending end : base " << symbol << " " << newKey << "\n";
+#endif
+                const str_uint_map_t::const_iterator wordCountIter(wordCount.find(newKey));
+                if (wordCountIter == wordCountEnd) continue;
+                const unsigned currWordCount(wordCountIter->second);
+
+                wordReadsIter= wordReads.find(newKey);
+                if (wordReadsIter == wordReadsEnd) continue;
+                const std::set<unsigned>& currWordReads(wordReadsIter->second);
+
+                // get the shared supporting reads between the contig and the current word
+                std::set<unsigned> sharedReads;
+                std::set_intersection(contig.supportReads.begin(), contig.supportReads.end(),
+                                      currWordReads.begin(), currWordReads.end(),
+                                      std::inserter(sharedReads, sharedReads.begin()));
+#ifdef DEBUG_WALK
+                log_os << "Word supporting reads : ";
+                print_unsignSet(currWordReads);
+                log_os << "Contig-word shared reads : ";
+                print_unsignSet(sharedReads);
+#endif
+
+                if (sharedReads.empty()) continue;
+
+                const unsigned sharedReadCount(sharedReads.size());
+                if (sharedReadCount > maxSharedReadCount)
+                {
+                    // the old shared reads support an unselected allele
+                    // remove them from the contig's supporting reads
+                    if (!maxSharedReads.empty())
+                        supportReads2Remove.insert(maxSharedReads.begin(), maxSharedReads.end());
+                    // the old supporting reads is for an unselected allele
+                    // they become rejecting reads for the currently selected allele
+                    if (!maxWordReads.empty())
+                        rejectReads2Add.insert(maxWordReads.begin(), maxWordReads.end());
+                    // new supporting reads for the currently selected allele
+                    maxWordReads = currWordReads;
+
+                    maxSharedReadCount = sharedReadCount;
+                    maxSharedReads = sharedReads;
+                    maxBaseCount = currWordCount;
+                    maxBase = symbol;
+                    maxWord = newKey;
+                }
+                else
+                {
+                    supportReads2Remove.insert(sharedReads.begin(), sharedReads.end());
+                    rejectReads2Add.insert(currWordReads.begin(), currWordReads.end());
+                }
+            }
+
+#ifdef DEBUG_WALK
+            log_os << "Winner is : " << maxBase << " with " << maxBaseCount << " occurrences." << "\n";
+#endif
+
+
+            if (maxBaseCount < opt.minCoverage)
+            {
+
+#ifdef DEBUG_WALK
+                log_os << "Coverage or error rate below threshold.\n"
+                       << "maxBaseCount : " << maxBaseCount << " minCoverage: " << opt.minCoverage << "\n";
+#endif
+
+                break;
+            }
+
+
+#ifdef DEBUG_WALK
+            log_os << "Adding base " << contig.seq << " " << maxBase << " " << mode << "\n";
+#endif
+
+            contig.seq = addBase(contig.seq, maxBase, isEnd);
+#ifdef DEBUG_WALK
+            log_os << "New contig : " << contig.seq << "\n";
+#endif
+
+            if ((conservativeEndOffset != 0) || (maxBaseCount < opt.minConservativeCoverage))
+                conservativeEndOffset += 1;
+#ifdef DEBUG_WALK
+            log_os << "conservative end offset : " << conservativeEndOffset << "\n";
+#endif
+
+            // TODO: can add threshold for the count or percentage of shared reads
+            {
+                // walk backwards for one step at a branching point
+                if (maxWordReads != previousWordReads)
+                {
+                    const char tmpSymbol = (isEnd? previousWord[0] : previousWord[wordLength-1]);
+                    for (const char symbol : opt.alphabet)
+                    {
+                        // the selected branch
+                        if (symbol == tmpSymbol) continue;
+
+                        // add rejecting reads from an unselected branch
+                        const std::string newKey(addBase(trunk, symbol, !isEnd));
+#ifdef DEBUG_WALK
+                        log_os << "Extending end backwords: base " << symbol << " " << newKey << "\n";
+#endif
+                        wordReadsIter= wordReads.find(newKey);
+                        if (wordReadsIter == wordReadsEnd) continue;
+                        const std::set<unsigned>& backWordReads(wordReadsIter->second);
+#ifdef DEBUG_WALK
+                        log_os << "Supporting reads for the backwards word : ";
+                        print_unsignSet(backWordReads);
+#endif
+                        rejectReads2Add.insert(backWordReads.begin(), backWordReads.end());
+#ifdef DEBUG_WALK
+                        log_os << "rejectReads2Add upated : ";
+                        print_unsignSet(rejectReads2Add);
+#endif
+                    }
+                }
+                previousWordReads = maxWordReads;
+
+#ifdef DEBUG_WALK
+                log_os << "Adding rejecting reads " << "\n"
+                       << " Old : ";
+                print_unsignSet(contig.rejectReads);
+                log_os << " To be added : ";
+                print_unsignSet(rejectReads2Add);
+#endif
+                // update rejecting reads
+                // add reads that support the unselected allele
+                for (const unsigned rd : rejectReads2Add)
+                {
+                    contig.rejectReads.insert(rd);
+                }
+#ifdef DEBUG_WALK
+                log_os << " New : ";
+                print_unsignSet(contig.rejectReads);
+#endif
+
+#ifdef DEBUG_WALK
+                log_os << "Updating supporting reads " << "\n"
+                       << " Old : ";
+                print_unsignSet(contig.supportReads);
+                log_os << " To be added : ";
+                print_unsignSet(maxWordReads);
+#endif
+                // update supporting reads
+                // add reads that support the selected allel
+                for (const unsigned rd : maxWordReads)
+                {
+                    if (contig.rejectReads.find(rd) == contig.rejectReads.end())
+                        contig.supportReads.insert(rd);
+#ifdef DEBUG_WALK
+                    if (contig.rejectReads.find(rd) != contig.rejectReads.end())
+                        log_os << "  Excluding rejected " << rd << "\n";
+#endif
+                }
+
+#ifdef DEBUG_WALK
+                log_os << " To be removed : ";
+                print_unsignSet(supportReads2Remove);
+#endif
+                // remove reads that do NOT support the selected allel anymore
+                for (const unsigned rd : supportReads2Remove)
+                {
+                    contig.supportReads.erase(rd);
+                }
+#ifdef DEBUG_WALK
+                log_os << " New : ";
+                print_unsignSet(contig.supportReads);
+#endif
+            }
+
+            // remove the last word from the unused list, so it cannot be used as the seed in finding the next contig
+            unusedWords.erase(maxWord);
+            // stop walk in the current mode after seeing one repeat word
+            if (repeatWords.find(maxWord) != repeatWords.end())
+            {
+#ifdef DEBUG_WALK
+                log_os << "Seen a repeat word " << maxWord << ". Stop walk in the current mode " << mode << "\n";
+#endif
+                isRepeatFound = true;
+                break;
+            }
+        }
+
+        // set conservative coverage range for the contig
+        if (mode == 0)
+            contig.conservativeRange.set_end_pos(conservativeEndOffset);
+        else
+            contig.conservativeRange.set_begin_pos(conservativeEndOffset);
+
+#ifdef DEBUG_WALK
+        log_os << "mode change. Current mode " << mode << "\n";
+#endif
+    }
+
+    contig.conservativeRange.set_end_pos(contig.seq.size()-contig.conservativeRange.end_pos());
+
+    return isRepeatFound;
+}
+
+
+
+/// \params isFindRepeatReads if true record all reads with repeated words
+///
+static
+void
+getKmerCounts(
+    const IterativeAssemblerOptions& opt,
+    const AssemblyReadInput& reads,
+    AssemblyReadOutput& readInfo,
+    const unsigned wordLength,
+    str_uint_map_t& wordCount,
+    str_set_uint_map_t& wordSupportReads)
+{
+    const unsigned readCount(reads.size());
+
+    for (unsigned readIndex(0); readIndex<readCount; ++readIndex)
+    {
+        // stores the index of a kmer in a read sequence
+        const std::string& seq(reads[readIndex]);
+        const unsigned readLen(seq.size());
+
+        // this read is unusable for assembly:
+        if (readLen < wordLength) continue;
+
+        // track all words from the read, including repetitive words
+        std::set<std::string> readWords;
+        for (unsigned j(0); j<=(readLen-wordLength); ++j)
+        {
+            const std::string word(seq.substr(j,wordLength));
+
+            // filter words with "N" (either directly from input alignment
+            // or marked due to low basecall quality:
+            if (word.find('N') != std::string::npos) continue;
+
+            readWords.insert(word);
+        }
+
+        AssemblyReadInfo& rinfo(readInfo[readIndex]);
+        unsigned wordCountAdd = 1;
+        // pseudo reads must have passed coverage check with smaller kmers
+        if (rinfo.isPseudo)
+            wordCountAdd = opt.minCoverage;
+
+        // total occurrences from this read
+        for (const std::string& word : readWords)
+        {
+            wordCount[word] += wordCountAdd;
+            // record the supporting read
+            wordSupportReads[word].insert(readIndex);
+        }
+    }
+}
+
+
+static
+unsigned
+searchRepeats(
+    const IterativeAssemblerOptions& opt,
+    const unsigned index,
+    const std::string& word,
+    str_pair_uint_map_t& wordIndices,
+    std::vector<std::string>& wordStack,
+    std::set<std::string>& repeatWords)
+{
+    // set the depth index for the current word to the smallest unused index
+    wordIndices[word] = std::pair<unsigned,unsigned>(index, index);
+    unsigned nextIndex = index + 1;
+    wordStack.push_back(word);
+
+    const std::string tmp(getEnd(word, word.size()-1, true));
+    for (const char symbol : opt.alphabet)
+    {
+        // candidate successor of the current word
+        const std::string nextWord(addBase(tmp, symbol, true));
+
+        // homopolymer
+        if (word == nextWord)
+        {
+            repeatWords.insert(word);
+            continue;
+        }
+
+        // the successor word does not exist in the reads
+        if (wordIndices.count(nextWord) == 0) continue;
+
+        const unsigned nextWordIdx = wordIndices[nextWord].first;
+        if (nextWordIdx == 0)
+        {
+            // the successor word has not been visited
+            // recurse on it
+            nextIndex = searchRepeats(opt, nextIndex, nextWord, wordIndices, wordStack, repeatWords);
+            // update the current word's lowlink
+            const unsigned wordLowLink = wordIndices[word].second;
+            const unsigned nextWordLowLink = wordIndices[nextWord].second;
+            wordIndices[word].second = std::min(wordLowLink, nextWordLowLink);
+        }
+        else
+        {
+            const bool isContained(std::find(wordStack.begin(), wordStack.end(), nextWord) != wordStack.end());
+            if (isContained)
+            {
+                // the successor word is in stack and therefore in the current circle of words
+                // only update the current word's lowlink
+                const unsigned wordLowLink = wordIndices[word].second;
+                wordIndices[word].second = std::min(wordLowLink, nextWordIdx);
+            }
+        }
+    }
+
+    // if the current word is a root node,
+    if (wordIndices[word].second == index)
+    {
+        // exclude singletons
+        bool isSingleton(wordStack.back() == word);
+        if (isSingleton)
+        {
+            wordStack.pop_back();
+        }
+        // record identified repeat words (i.e. words in the current circle)
+        else
+        {
+            while (true)
+            {
+                const std::string repeatWd = wordStack.back();
+                repeatWords.insert(repeatWd);
+                wordStack.pop_back();
+
+                if (repeatWd == word) break;
+            }
+        }
+    }
+
+    return nextIndex;
+}
+
+
+static
+void
+getRepeatKmers(
+    const IterativeAssemblerOptions& opt,
+    const str_uint_map_t& wordCount,
+    std::set<std::string>& repeatWords)
+{
+    str_pair_uint_map_t wordIndices;
+    for (const str_uint_map_t::value_type& wdct : wordCount)
+    {
+        wordIndices[wdct.first] = std::pair<unsigned,unsigned>(0, 0);
+    }
+
+    unsigned index = 1;
+    std::vector<std::string> wordStack;
+    for (const str_pair_uint_map_t::value_type& wdidx : wordIndices)
+    {
+        const std::string word = wdidx.first;
+        const unsigned wordIdx = wdidx.second.first;
+        if (wordIdx == 0)
+            index = searchRepeats(opt, index, word, wordIndices, wordStack, repeatWords);
+    }
+}
+
+
+static
+bool
+buildContigs(
+    const IterativeAssemblerOptions& opt,
+    const AssemblyReadInput& reads,
+    AssemblyReadOutput& readInfo,
+    const unsigned wordLength,
+    Assembly& contigs)
+{
+#ifdef DEBUG_ASBL
+    static const std::string logtag("buildContigs: ");
+    log_os << logtag << "Building contigs with " << reads.size() << " reads.\n";
+#endif
+
+    contigs.clear();
+    bool isAssemblySuccess(true);
+
+    // counts the number of occurrences for each kmer in all reads
+    str_uint_map_t wordCount;
+    // records the supporting reads for each kmer
+    str_set_uint_map_t wordSupportReads;
+    // get counts and supporting reads for each kmer
+    getKmerCounts(opt, reads, readInfo, wordLength, wordCount, wordSupportReads);
+
+    // identify repeat kmers (i.e. circles from the de bruijn graph)
+    std::set<std::string> repeatWords;
+    getRepeatKmers(opt, wordCount, repeatWords);
+#ifdef DEBUG_ASBL
+    log_os << logtag << "Identified " << repeatWords.size() << " repeat words.\n";
+    print_stringSet(repeatWords);
+#endif
+
+    // track kmers can be used as seeds for searching for the next contig
+    std::set<std::string> unusedWords;
+    for (const str_uint_map_t::value_type& wdct : wordCount)
+    {
+        // filter out kmers with too few coverage
+        if (wdct.second >= opt.minCoverage)
+            unusedWords.insert(wdct.first);
+    }
+
+    while (!unusedWords.empty())
+    {
+        std::string maxWord;
+        unsigned maxWordCount(0);
+        // get the kmers corresponding the highest count
+        for (const std::string& word : unusedWords)
+        {
+            assert (wordCount.count(word) > 0);
+            const unsigned currWordCount = wordCount.at(word);
+            if (currWordCount > maxWordCount)
+            {
+                maxWord = word;
+                maxWordCount = currWordCount;
+            }
+        }
+
+        // solve for a best contig in the graph by a heuristic greedy maxflow-ish criteria
+        AssembledContig contig;
+        bool isRepeatFound = walk(opt, maxWord, wordLength, wordCount, wordSupportReads, repeatWords, unusedWords, contig);
+        if (isRepeatFound) isAssemblySuccess = false;
+
+#ifdef DEBUG_ASBL
+        log_os << logtag << "Found one contig of length " << contig.seq.size()
+               << ", with supporting reads: ";
+        print_unsignSet(contig.supportReads);
+        log_os << ", with rejecting reads: ";
+        print_unsignSet(contig.rejectReads);
+        log_os << ". Contig seq: \n" << contig.seq << "\n";
+#endif
+
+        contigs.push_back(contig);
+    }
+
+    // done with this now
+    wordCount.clear();
+    wordSupportReads.clear();
+
+    return isAssemblySuccess;
+}
+
+
+static
+void
+selectContigs(
+    const IterativeAssemblerOptions& opt,
+    AssemblyReadOutput& readInfo,
+    const unsigned normalReadCount,
+    Assembly candidateContigs,
+    Assembly& finalContigs)
+{
+#ifdef DEBUG_ASBL
+    static const std::string logtag("selectContigs: ");
+    log_os << logtag << "Start selecting contigs to be returned.\n";
+#endif
+
+    finalContigs.clear();
+    unsigned finalContigCount(0);
+    // a set of reads that has been used to construct contigs, including pseudo ones.
+    std::set<unsigned> usedReads;
+    // a set of pseudo reads that has been used to construct contigs
+    std::set<unsigned> usedPseudoReads;
+
+    while ((candidateContigs.size() > 0) && (finalContigCount < opt.maxAssemblyCount))
+    {
+        // count unused reads that are not pseudo reads
+        const unsigned usedNormalReads = usedReads.size() - usedPseudoReads.size();
+        const unsigned unusedNormalReads = normalReadCount - usedNormalReads;
+#ifdef DEBUG_ASBL
+        log_os << logtag << "# of candidateContigs: " << candidateContigs.size() << "\n";
+        log_os << logtag << "# of unused normal reads: " << unusedNormalReads << "\n";
+#endif
+        if (unusedNormalReads < opt.minUnusedReads) return;
+
+        unsigned contigIndex(0);
+        std::set<unsigned> contigs2Remove;
+
+        AssembledContig selectedContig;
+        unsigned selectedContigIndex;
+        unsigned maxSupport(0);
+        unsigned maxLength(0);
+        for (const AssembledContig& contig : candidateContigs)
+        {
+            // identify new support reads that were not used for the previously identified contigs
+            std::set<unsigned> newSupportReads;
+            std::set_difference(contig.supportReads.begin(), contig.supportReads.end(),
+                                usedReads.begin(), usedReads.end(),
+                                std::inserter(newSupportReads, newSupportReads.end()));
+#ifdef DEBUG_ASBL
+            log_os << logtag << "Contig #" << contigIndex << " newSupportReads=";
+            print_unsignSet(newSupportReads);
+#endif
+
+            // count the number of new support reads that are not pseudo reads
+            unsigned newNormalSupport(0);
+            for (const unsigned rd : newSupportReads)
+            {
+                const AssemblyReadInfo& rinfo(readInfo[rd]);
+                if (!rinfo.isPseudo) newNormalSupport++;
+            }
+            if (newNormalSupport < opt.minSupportReads)
+            {
+#ifdef DEBUG_ASBL
+                log_os << logtag << "Contig #" << contigIndex << " to be skipped: too few non-pseudo support reads that has not been used for previously identified contigs.\n";
+#endif
+                contigs2Remove.insert(contigIndex);
+                contigIndex++;
+                continue;
+            }
+
+            // either more support reads that were not used
+            // or the same number of supports but longer contig
+            const unsigned currNewSupport = newSupportReads.size();
+            const unsigned currContigLen = contig.seq.size();
+            bool isBetterContig((currNewSupport > maxSupport) ||
+                                ((currNewSupport == maxSupport) && (currContigLen > maxLength)));
+            if (isBetterContig)
+            {
+                selectedContig = contig;
+                selectedContigIndex = contigIndex;
+                maxSupport = currNewSupport;
+                maxLength = currContigLen;
+            }
+
+            contigIndex++;
+        }
+
+        // no more contigs selected, selection is done.
+        if (maxSupport == 0) break;
+
+#ifdef DEBUG_ASBL
+        log_os << logtag << "Contig #" << selectedContigIndex << " selected.\n";
+#endif
+        // select one contig
+        finalContigs.push_back(selectedContig);
+
+        contigs2Remove.insert(selectedContigIndex);
+        // remove selected & failed contigs
+        BOOST_REVERSE_FOREACH(const unsigned cix, contigs2Remove)
+        {
+            candidateContigs.erase(candidateContigs.begin()+cix);
+        }
+#ifdef DEBUG_ASBL
+        log_os << logtag << "Removed " << contigs2Remove.size() << " contigs.\n";
+#endif
+
+        // update the info about used reads
+        for (const unsigned rd : selectedContig.supportReads)
+        {
+            usedReads.insert(rd);
+            AssemblyReadInfo& rinfo(readInfo[rd]);
+            // read info record the ID of the very first contig that the read supports
+            // TODO: may need to record the IDs of all contigs that the read supports?
+            if (!rinfo.isUsed)
+            {
+                rinfo.isUsed = true;
+                rinfo.contigId = finalContigCount;
+            }
+            if (rinfo.isPseudo) usedPseudoReads.insert(rd);
+        }
+#ifdef DEBUG_ASBL
+        log_os << logtag << "Updated used reads: \n";
+        print_unsignSet(usedReads);
+#endif
+
+        finalContigCount++;
+    }
+}
+
+
+void
+runIterativeAssembler(
+    const IterativeAssemblerOptions& opt,
+    AssemblyReadInput& reads,
+    AssemblyReadOutput& readInfo,
+    Assembly& contigs)
+{
+    const unsigned normalReadCount(reads.size());
+#ifdef DEBUG_ASBL
+    static const std::string logtag("runIterativeAssembler: ");
+    log_os << logtag << "Starting assembly with " << normalReadCount << " reads.\n";
+#endif
+    assert(opt.alphabet.size()>1);
+
+    readInfo.clear();
+    readInfo.resize(reads.size());
+    Assembly iterativeContigs;
+
+    for (unsigned wordLength(opt.minWordLength); wordLength<=opt.maxWordLength; wordLength+=opt.wordStepSize)
+    {
+#ifdef DEBUG_ASBL
+        log_os << logtag << "Try " << wordLength << "-mer.\n";
+#endif
+        const bool isAssemblySuccess = buildContigs(opt, reads, readInfo, wordLength, iterativeContigs);
+        if (isAssemblySuccess)
+        {
+#ifdef DEBUG_ASBL
+            log_os << logtag << "Assembly succeeded with " << wordLength << "-mer.\n";
+#endif
+            break;
+        }
+
+#ifdef DEBUG_ASBL
+        log_os << logtag << "Repeats encountered with " << wordLength << "-mer.\n";
+#endif
+        // remove pseudo reads from the previous iteration
+        const unsigned readCount(reads.size());
+        for (unsigned readIndex(0); readIndex<readCount; ++readIndex)
+        {
+            AssemblyReadInfo& rinfo(readInfo[readIndex]);
+            if (rinfo.isPseudo)
+            {
+                reads.erase(reads.begin()+readIndex, reads.end());
+                readInfo.erase(readInfo.begin()+readIndex, readInfo.end());
+#ifdef DEBUG_ASBL
+                log_os << logtag << "Removed " << (readCount - readIndex) << " pseudo reads (from the previous iteration).\n";
+#endif
+                break;
+            }
+        }
+
+        unsigned addedCount(0);
+        //  Add contigs from the current iteration as pseudo reads
+        for (const AssembledContig& contig : iterativeContigs)
+        {
+            if (contig.seq.size() > (wordLength+opt.wordStepSize))
+            {
+#ifdef DEBUG_ASBL
+                log_os << logtag << "Adding a contig as pseudo read: " << contig.seq << ".\n";
+#endif
+                reads.push_back(contig.seq);
+
+                AssemblyReadInfo rinfo;
+                rinfo.isPseudo = true;
+                readInfo.push_back(rinfo);
+
+                addedCount++;
+            }
+        }
+#ifdef DEBUG_ASBL
+        log_os << logtag << "Added " << addedCount << " pseudo reads.\n";
+#endif
+    }
+
+    // greedy selection of contigs to be returned
+    selectContigs(opt, readInfo, normalReadCount, iterativeContigs, contigs);
+
+#ifdef DEBUG_ASBL
+    log_os << logtag << "Selected " << contigs.size() << "contigs.\n";
+    unsigned index(1);
+    for (const AssembledContig& ctg : contigs)
+    {
+        log_os << logtag <<"Selected contig # " << index << ": " << ctg.seq << "\n";
+        log_os << logtag << "Contig supporting reads: ";
+        print_unsignSet(ctg.supportReads);
+        log_os << logtag << "Contig rejecting reads: ";
+        print_unsignSet(ctg.rejectReads);
+        index++;
+    }
+#endif
+}
+
diff --git a/src/c++/lib/assembly/IterativeAssembler.hh b/src/c++/lib/assembly/IterativeAssembler.hh
new file mode 100644
index 0000000..09af184
--- /dev/null
+++ b/src/c++/lib/assembly/IterativeAssembler.hh
@@ -0,0 +1,48 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Xiaoyu Chen
+///
+
+#pragma once
+
+#include "assembly/AssembledContig.hh"
+#include "assembly/AssemblyReadInfo.hh"
+#include "options/IterativeAssemblerOptions.hh"
+
+
+/// \brief run a de-bruijn graph assembler intended for small-scale allele discovery
+///
+/// the assembler iteratively builds multiple contigs through a range of word sizes
+///
+/// \param[in] opt assembly parameters
+/// \param[in] reads the set of reads to use for the assembly
+/// \param[out] assembledReadInfo for each read in 'reads', provide information on if and how it was assembled into a contig
+/// \param[out] contigs zero to many assembled contigs
+///
+void
+runIterativeAssembler(
+    const IterativeAssemblerOptions& opt,
+    AssemblyReadInput& reads,
+    AssemblyReadOutput& assembledReadInfo,
+    Assembly& contigs);
+
+
diff --git a/src/c++/lib/assembly/SmallAssembler.cpp b/src/c++/lib/assembly/SmallAssembler.cpp
new file mode 100644
index 0000000..00ed10a
--- /dev/null
+++ b/src/c++/lib/assembly/SmallAssembler.cpp
@@ -0,0 +1,734 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Ole Schulz-Trieglaff and Xiaoyu Chen
+///
+
+
+#include "assembly/SmallAssembler.hh"
+#include "blt_util/set_util.hh"
+
+#include <cassert>
+
+#include <iterator>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+
+// compile with this macro to get verbose output:
+//#define DEBUG_ASBL
+
+
+// stream used by DEBUG_ASBL:
+#ifdef DEBUG_ASBL
+#include "blt_util/log.hh"
+#include <iostream>
+
+static
+void
+print_readSet(
+    const std::set<unsigned>& readSet)
+{
+    bool isFirst(true);
+
+    log_os << "[";
+    for (const unsigned rd : readSet)
+    {
+        if (! isFirst) log_os << ",";
+        log_os << rd ;
+        isFirst=false;
+    }
+    log_os << "]\n";
+}
+#endif
+
+
+// maps kmers to positions in read
+typedef std::unordered_map<std::string,unsigned> str_uint_map_t;
+// maps kmers to support reads
+typedef std::unordered_map<std::string,std::set<unsigned> > str_set_uint_map_t;
+
+typedef std::unordered_set<std::string> str_set_t;
+
+
+/**
+ * Adds base @p base to the end (isEnd is true) or start (otherwise) of the contig.
+ *
+ *	@return The extended contig.
+ */
+static
+std::string
+addBase(
+    const std::string& contig,
+    const char base,
+    const bool isEnd)
+{
+    if (isEnd) return contig + base;
+    else       return base + contig;
+}
+
+
+
+/**
+ * Returns a suffix (isEnd is true) or prefix (otherwise) of @p contig with length @p length.
+ *
+ *	@return The suffix or prefix.
+ */
+static
+std::string
+getEnd(
+    const std::string& contig,
+    const unsigned length,
+    const bool isEnd)
+{
+
+    const unsigned csize(contig.size());
+    assert(length <= csize);
+
+    if (isEnd) return contig.substr((csize-length),length);
+    else       return contig.substr(0,length);
+}
+
+
+#if 0
+/// adapt Ole's function to represent word hash as a graph:
+///
+void
+wordHashToDot(
+    const str_uint_map_t& wordCount,
+    std::ostream& os)
+{
+    static const unsigned MIN_KMER_FREQ(1);
+
+    // kmer nodes with coverage higher than this get a different color
+    //static const int lowCovGraphVisThreshold(3);
+    static const unsigned lowCovGraphVisThreshold(MIN_KMER_FREQ);
+    static const std::string lowCovNodeColor("red");
+    static const std::string highCovNodeColor("green");
+
+    os << "graph {\n";
+    os << "node [ style = filled ];\n";
+    str_uint_map_t aliasH;
+    unsigned n(0);
+    for (const str_uint_map_t::value_type& val : wordCount)
+    {
+        const std::string& word(val.first);
+        const unsigned cov(val.second);
+        aliasH[word] = n++;
+        const std::string& color(cov>lowCovGraphVisThreshold ? highCovNodeColor : lowCovNodeColor);
+        os << word << "[label=\"cov" << cov << "\" color=" << color << "]\n";
+    }
+
+    // need to add edges here
+    static const bool isEnd(true);
+    for (const str_uint_map_t::value_type& val : wordCount)
+    {
+        const std::string& word(val.first);
+        const std::string tmp(getEnd(word,word.size()-1,isEnd));
+        for (const char symbol : alphabet)
+        {
+            const std::string newKey(addBase(tmp,symbol,isEnd));
+            if (wordCount.find(newKey) != wordCount.end())
+            {
+                os << aliasH[word] << " -- " << aliasH[newKey] << ";\n";
+            }
+        }
+    }
+    os << "}\n";
+}
+#endif
+
+
+
+/**
+ * Extends the seed contig (aka most frequent k-mer)
+ *
+ */
+static
+void
+walk(const SmallAssemblerOptions& opt,
+     const std::string& seed,
+     const unsigned wordLength,
+     const str_uint_map_t& wordCount,
+     const str_set_uint_map_t& wordReads,
+     std::set<std::string>& seenEdgeBefore,
+     AssembledContig& contig)
+{
+    const str_uint_map_t::const_iterator wordCountEnd(wordCount.end());
+    const str_set_uint_map_t::const_iterator wordReadsEnd(wordReads.end());
+
+    // we start with the seed
+    str_set_uint_map_t::const_iterator wordReadsIter(wordReads.find(seed));
+    assert(wordReadsIter != wordReadsEnd);
+    contig.supportReads = wordReadsIter->second;
+    contig.seq = seed;
+
+    // collecting rejecting reads for the seed from the unselected branches
+    for (const char symbol : opt.alphabet)
+    {
+        // the seed itself
+        if (symbol == seed[wordLength-1]) continue;
+
+        // add rejecting reads from an unselected word/branch
+        const std::string tmpBack = getEnd(seed, wordLength-1, false);
+        const std::string newKey(addBase(tmpBack, symbol, true));
+#ifdef DEBUG_ASBL
+        log_os << "Extending end backwords: base " << symbol << " " << newKey << "\n";
+#endif
+
+        wordReadsIter= wordReads.find(newKey);
+        if (wordReadsIter == wordReadsEnd) continue;
+        const std::set<unsigned>& unselectedReads(wordReadsIter->second);
+#ifdef DEBUG_ASBL
+        log_os << "Supporting reads for the backwards word : ";
+        print_readSet(unselectedReads);
+#endif
+
+        contig.rejectReads.insert(unselectedReads.begin(), unselectedReads.end());
+#ifdef DEBUG_ASBL
+        log_os << "seed's rejecting reads : ";
+        print_readSet(contig.rejectReads);
+#endif
+    }
+
+    seenEdgeBefore.clear();
+    seenEdgeBefore.insert(seed);
+
+    str_set_t seenVertexBefore;
+
+    // 0 => walk to the right, 1 => walk to the left
+    for (unsigned mode(0); mode<2; ++mode)
+    {
+        unsigned conservativeEndOffset(0);
+
+        const bool isEnd(mode==0);
+
+        while (true)
+        {
+            const std::string previousWord = getEnd(contig.seq, wordLength, isEnd);
+            const std::string trunk(getEnd(contig.seq, wordLength-1, isEnd));
+
+#ifdef DEBUG_ASBL
+            log_os << "# current contig : " << contig.seq << " size : " << contig.seq.size() << "\n"
+                   << " getEnd : " << trunk << "\n";
+            log_os << "contig rejecting reads : ";
+            print_readSet(contig.rejectReads);
+            log_os << "contig supporting reads : ";
+            print_readSet(contig.supportReads);
+#endif
+
+            if (seenVertexBefore.count(trunk))
+            {
+#ifdef DEBUG_ASBL
+                log_os << "Seen word " << trunk << " before on this walk, terminating" << "\n";
+#endif
+                break;
+            }
+
+            seenVertexBefore.insert(trunk);
+
+            unsigned maxBaseCount(0);
+            unsigned maxSharedReadCount(0);
+            char maxBase(opt.alphabet[0]);
+            std::set<unsigned> maxWordReads;
+            std::set<unsigned> maxSharedReads;
+            std::set<unsigned> previousWordReads;
+            std::set<unsigned> supportReads2Remove;
+            std::set<unsigned> rejectReads2Add;
+
+            for (const char symbol : opt.alphabet)
+            {
+                const std::string newKey(addBase(trunk, symbol, isEnd));
+#ifdef DEBUG_ASBL
+                log_os << "Extending end : base " << symbol << " " << newKey << "\n";
+#endif
+                const str_uint_map_t::const_iterator wordCountIter(wordCount.find(newKey));
+                if (wordCountIter == wordCountEnd) continue;
+                const unsigned currWordCount(wordCountIter->second);
+
+                wordReadsIter= wordReads.find(newKey);
+                if (wordReadsIter == wordReadsEnd) continue;
+                const std::set<unsigned>& currWordReads(wordReadsIter->second);
+
+                // get the shared supporting reads between the contig and the current word
+                std::set<unsigned> sharedReads;
+                std::set_intersection(contig.supportReads.begin(), contig.supportReads.end(),
+                                      currWordReads.begin(), currWordReads.end(),
+                                      std::inserter(sharedReads, sharedReads.begin()));
+#ifdef DEBUG_ASBL
+                log_os << "Word supporting reads : ";
+                print_readSet(currWordReads);
+                log_os << "Contig-word shared reads : ";
+                print_readSet(sharedReads);
+#endif
+
+                if (sharedReads.empty()) continue;
+
+                const unsigned sharedReadCount(sharedReads.size());
+                if (sharedReadCount > maxSharedReadCount)
+                {
+                    // the old shared reads support an unselected allele
+                    // remove them from the contig's supporting reads
+                    if (!maxSharedReads.empty())
+                        supportReads2Remove.insert(maxSharedReads.begin(), maxSharedReads.end());
+                    // the old supporting reads is for an unselected allele
+                    // they become rejecting reads for the currently selected allele
+                    if (!maxWordReads.empty())
+                        rejectReads2Add.insert(maxWordReads.begin(), maxWordReads.end());
+                    // new supporting reads for the currently selected allele
+                    maxWordReads = currWordReads;
+                    maxSharedReadCount = sharedReadCount;
+                    maxSharedReads = sharedReads;
+                    maxBaseCount = currWordCount;
+                    maxBase = symbol;
+                }
+                else
+                {
+                    supportReads2Remove.insert(sharedReads.begin(), sharedReads.end());
+                    rejectReads2Add.insert(currWordReads.begin(), currWordReads.end());
+                }
+            }
+
+#ifdef DEBUG_ASBL
+            log_os << "Winner is : " << maxBase << " with " << maxBaseCount << " occurrences." << "\n";
+#endif
+
+            if (maxBaseCount < opt.minCoverage)
+            {
+#ifdef DEBUG_ASBL
+                log_os << "Coverage or error rate below threshold.\n"
+                       << "maxBaseCount : " << maxBaseCount << " minCverage: " << opt.minCoverage << "\n";
+#endif
+                break;
+            }
+
+            /// double check that word exists in reads at least once:
+            if (maxBaseCount == 0) break;
+
+            {
+                const std::string newEdge(addBase(trunk, maxBase, isEnd));
+                seenEdgeBefore.insert(newEdge);
+            }
+
+#ifdef DEBUG_ASBL
+            log_os << "Adding base " << contig.seq << " " << maxBase << " " << mode << "\n";
+#endif
+            contig.seq = addBase(contig.seq, maxBase, isEnd);
+
+            if ((conservativeEndOffset != 0) || (maxBaseCount < opt.minConservativeCoverage))
+            {
+                conservativeEndOffset += 1;
+            }
+
+#ifdef DEBUG_ASBL
+            log_os << "New contig : " << contig.seq << "\n";
+#endif
+
+            // TODO: can add threshold for the count or percentage of shared reads
+            {
+                // walk backwards for one step at a branching point
+                if (maxWordReads != previousWordReads)
+                {
+                    const char tmpSymbol = (isEnd? previousWord[0] : previousWord[wordLength-1]);
+                    for (const char symbol : opt.alphabet)
+                    {
+                        // the selected branch
+                        if (symbol == tmpSymbol) continue;
+
+                        // add rejecting reads from an unselected branch
+                        const std::string newKey(addBase(trunk, symbol, !isEnd));
+#ifdef DEBUG_ASBL
+                        log_os << "Extending end backwords: base " << symbol << " " << newKey << "\n";
+#endif
+                        wordReadsIter= wordReads.find(newKey);
+                        if (wordReadsIter == wordReadsEnd) continue;
+                        const std::set<unsigned>& backWordReads(wordReadsIter->second);
+#ifdef DEBUG_ASBL
+                        log_os << "Supporting reads for the backwards word : ";
+                        print_readSet(backWordReads);
+#endif
+                        rejectReads2Add.insert(backWordReads.begin(), backWordReads.end());
+#ifdef DEBUG_ASBL
+                        log_os << "rejectReads2Add upated : ";
+                        print_readSet(rejectReads2Add);
+#endif
+                    }
+                }
+                previousWordReads = maxWordReads;
+
+#ifdef DEBUG_ASBL
+                log_os << "Adding rejecting reads " << "\n"
+                       << " Old : ";
+                print_readSet(contig.rejectReads);
+                log_os << " To be added : ";
+                print_readSet(rejectReads2Add);
+#endif
+                // update rejecting reads
+                // add reads that support the unselected allele
+                for (const unsigned rd : rejectReads2Add)
+                {
+                    contig.rejectReads.insert(rd);
+                }
+#ifdef DEBUG_ASBL
+                log_os << " New : ";
+                print_readSet(contig.rejectReads);
+#endif
+
+#ifdef DEBUG_ASBL
+                log_os << "Updating supporting reads " << "\n"
+                       << " Old : ";
+                print_readSet(contig.supportReads);
+                log_os << " To be added : ";
+                print_readSet(maxWordReads);
+#endif
+                // update supporting reads
+                // add reads that support the selected allel
+                for (const unsigned rd : maxWordReads)
+                {
+                    if (contig.rejectReads.find(rd) == contig.rejectReads.end())
+                        contig.supportReads.insert(rd);
+#ifdef DEBUG_ASBL
+                    if (contig.rejectReads.find(rd) != contig.rejectReads.end())
+                        log_os << "  Excluding rejected " << rd << "\n";
+#endif
+                }
+
+#ifdef DEBUG_ASBL
+                log_os << " To be removed : ";
+                print_readSet(supportReads2Remove);
+#endif
+                // remove reads that do NOT support the selected allel anymore
+                for (const unsigned rd : supportReads2Remove)
+                {
+                    contig.supportReads.erase(rd);
+                }
+#ifdef DEBUG_ASBL
+                log_os << " New : ";
+                print_readSet(contig.supportReads);
+#endif
+            }
+        }
+
+        if (mode == 0)
+        {
+            contig.conservativeRange.set_end_pos(conservativeEndOffset);
+        }
+        else
+        {
+            contig.conservativeRange.set_begin_pos(conservativeEndOffset);
+        }
+
+#ifdef DEBUG_ASBL
+        log_os << "mode change. Current mode " << mode << "\n";
+#endif
+    }
+
+    contig.conservativeRange.set_end_pos(contig.seq.size()-contig.conservativeRange.end_pos());
+}
+
+
+
+/// \params isFindRepeatReads if true record all reads with repeated words
+///
+static
+bool
+getKmerCounts(
+    const AssemblyReadInput& reads,
+    const AssemblyReadOutput& readInfo,
+    const unsigned wordLength,
+    const bool isFindRepeatReads,
+    std::vector<int>& repeatReads,
+    str_uint_map_t& wordCount,
+    str_set_uint_map_t& wordSupportReads,
+    std::vector<str_uint_map_t>& readWordOffsets)
+{
+    const unsigned readCount(reads.size());
+    repeatReads.clear();
+
+    for (unsigned readIndex(0); readIndex<readCount; ++readIndex)
+    {
+        const AssemblyReadInfo& rinfo(readInfo[readIndex]);
+
+        // skip reads used in a previous iteration
+        if (rinfo.isUsed) continue;
+
+        // stores the index of a kmer in a read sequence
+        const std::string& seq(reads[readIndex]);
+        const unsigned readLen(seq.size());
+
+        // this read is unusable for assembly:
+        if (readLen < wordLength) continue;
+
+        str_uint_map_t& readWordOffset(readWordOffsets[readIndex]);
+
+        for (unsigned j(0); j<=(readLen-wordLength); ++j)
+        {
+            const std::string word(seq.substr(j,wordLength));
+
+            // filter words with "N" (either directly from input alignment
+            // or marked due to low basecall quality:
+            if (word.find('N') != std::string::npos) continue;
+
+            if (readWordOffset.find(word) != readWordOffset.end())
+            {
+#ifdef DEBUG_ASBL
+                log_os << __FUNCTION__ << ": word " << word << " repeated in read " << readIndex << "\n";
+#endif
+                if (isFindRepeatReads)
+                {
+                    repeatReads.push_back(readIndex);
+                    break;
+                }
+                else
+                {
+                    // try again with different k-mer size
+                    return false;
+                }
+            }
+
+            // record (0-indexed) start point for word in read
+            //cout << "Recording " << word << " at " << j << "\n";
+            readWordOffset[word]=j;
+        }
+
+        // total occurrences from this read:
+        for (const str_uint_map_t::value_type& offset : readWordOffset)
+        {
+            wordCount[offset.first]++;
+            // record the supporting read
+            wordSupportReads[offset.first].insert(readIndex);
+        }
+    }
+
+    return (repeatReads.empty());
+}
+
+
+
+static
+bool
+buildContigs(
+    const SmallAssemblerOptions& opt,
+    const bool isLastWord,
+    const AssemblyReadInput& reads,
+    AssemblyReadOutput& readInfo,
+    const unsigned wordLength,
+    Assembly& contigs,
+    unsigned& unusedReads)
+{
+    const unsigned readCount(reads.size());
+
+#ifdef DEBUG_ASBL
+    static const std::string logtag("buildContigs: ");
+    log_os << logtag << "In SVLocusAssembler::buildContig. word length=" << wordLength << " readCount: " << readCount << "\n";
+    for (unsigned readIndex(0); readIndex<readCount; ++readIndex)
+    {
+        log_os << "read #" << readIndex <<": " << reads[readIndex] << " used=" << readInfo[readIndex].isUsed << "\n";
+    }
+#endif
+
+    // a set of read hashes; each read hash stores the starting positions of all kmers in the read
+    std::vector<str_uint_map_t> readWordOffsets(readCount);
+    // counts the number of occurrences for each kmer in all reads
+    str_uint_map_t wordCount;
+    // records the supporting reads for each kmer
+    str_set_uint_map_t wordSupportReads;
+
+    std::vector<int> repeatReads;
+    const bool isGoodKmerCount(getKmerCounts(reads, readInfo, wordLength, isLastWord, repeatReads, wordCount, wordSupportReads, readWordOffsets));
+    if (! isGoodKmerCount)
+    {
+        if (isLastWord)
+        {
+            for (const int readIndex : repeatReads)
+            {
+                readInfo[readIndex].isUsed = true;
+                readInfo[readIndex].isFiltered = true;
+                unusedReads--;
+            }
+        }
+        return false;
+    }
+
+    // get the kmers corresponding the highest count
+    std::set<std::string> maxWords;
+    {
+        unsigned maxWordCount(0);
+        for (const auto& val : wordCount)
+        {
+            if (val.second < maxWordCount) continue;
+            if (val.second > maxWordCount)
+            {
+                maxWords.clear();
+                maxWordCount = val.second;
+            }
+
+            maxWords.insert(val.first);
+        }
+
+        if (maxWordCount < opt.minCoverage)
+        {
+#ifdef DEBUG_ASBL
+            log_os << logtag << "Coverage too low : " << maxWordCount << " " << opt.minCoverage << "\n";
+#endif
+            return false;
+        }
+    }
+
+    // solve for a best contig in the graph by a heuristic greedy maxflow-ish criteria
+    AssembledContig contig;
+    std::string maxWord;
+    {
+        // consider multiple possible most frequent seeding k-mers to find the one associated with the longest contig:
+        //
+        std::set<std::string> seenEdgeBefore;   // records k-mers already encountered during extension
+
+        while (! maxWords.empty())
+        {
+            maxWord=(*maxWords.begin());
+            maxWords.erase(maxWords.begin());
+#ifdef DEBUG_ASBL
+            log_os << logtag << "Seeding kmer : " << maxWord << "\n";
+#endif
+
+            AssembledContig newContig;
+            walk(opt, maxWord, wordLength, wordCount, wordSupportReads, seenEdgeBefore, newContig);
+
+            if (newContig.seq.size() > contig.seq.size())
+            {
+                contig = newContig;
+            }
+
+            // subtract seenBefore from maxWords
+            inplaceSetSubtract(seenEdgeBefore,maxWords);
+        }
+
+        // done with this now
+        wordCount.clear();
+    }
+
+#ifdef DEBUG_ASBL
+    log_os << logtag << "First pass assembly resulted in "
+           << contig.seq << "\n"
+           << " with length " << contig.seq.size() << ". Input consisted of " << readCount << " reads.\n"
+           << "Final supporting reads: ";
+    print_readSet(contig.supportReads);
+    log_os << "Final rejecting reads: ";
+    print_readSet(contig.rejectReads);
+#endif
+
+    // WHY CHECK THIS AFTER WALK???
+    // increment number of reads containing the seeding kmer
+    //
+    // TODO isn't this equal to maxWordCount? Do we need to sum it here?
+    for (unsigned readIndex(0); readIndex<readCount; ++readIndex)
+    {
+        const str_uint_map_t& readWordOffset(readWordOffsets[readIndex]);
+        if (readWordOffset.count(maxWord)) ++contig.seedReadCount;
+    }
+
+#ifdef DEBUG_ASBL
+    log_os << logtag << "final seeding reading count: " << contig.seedReadCount << "\n";
+#endif
+    if (contig.seedReadCount < opt.minSeedReads)
+    {
+#ifdef DEBUG_ASBL
+        log_os << "\t...which is below minSeedReadCount of " << opt.minSeedReads << " discarding.\n";
+#endif
+        return false;
+    }
+
+    // finally -- set isUsed and decrement unusedReads
+    for (unsigned readIndex(0); readIndex<readCount; ++readIndex)
+    {
+        AssemblyReadInfo& rinfo(readInfo[readIndex]);
+        if (rinfo.isUsed) continue;
+
+        if (contig.supportReads.find(readIndex) != contig.supportReads.end())
+        {
+            rinfo.isUsed = true;
+            rinfo.contigId = contigs.size();
+
+            assert(unusedReads != 0);
+            --unusedReads;
+        }
+    }
+
+    // don't need this anymore:
+    readWordOffsets.clear();
+
+    contigs.push_back(contig);
+    return true;
+}
+
+
+
+void
+runSmallAssembler(
+    const SmallAssemblerOptions& opt,
+    const AssemblyReadInput& reads,
+    AssemblyReadOutput& assembledReadInfo,
+    Assembly& contigs)
+{
+#ifdef DEBUG_ASBL
+    static const std::string logtag("runSmallAssembler: ");
+    log_os << logtag << "Starting assembly with " << reads.size() << " reads.\n";
+#endif
+    assert(opt.alphabet.size()>1);
+
+    assembledReadInfo.clear();
+    contigs.clear();
+
+    assembledReadInfo.resize(reads.size());
+
+    unsigned unusedReads(reads.size());
+
+    for (unsigned iteration(0); iteration < opt.maxAssemblyIterations; ++iteration)
+    {
+        if (unusedReads < opt.minSeedReads) return;
+
+        const unsigned lastUnusedReads(unusedReads);
+        for (unsigned wordLength(opt.minWordLength); wordLength<=opt.maxWordLength; wordLength+=opt.wordStepSize)
+        {
+            const bool isLastWord(wordLength+opt.wordStepSize > opt.maxWordLength);
+            const bool isAssemblySuccess = buildContigs(opt, isLastWord, reads, assembledReadInfo, wordLength, contigs, unusedReads);
+            if (isAssemblySuccess) break;
+        }
+
+#ifdef DEBUG_ASBL
+        log_os << logtag << "iter: " << iteration << " unused readMap now: " << unusedReads << "\n";
+#endif
+
+        // stop if no change in number of unused reads
+        if (unusedReads == lastUnusedReads)
+        {
+#ifdef DEBUG_ASBL
+            log_os << logtag << "Number of unused reads (" << unusedReads << ") did not change in this iteration. Stopping.\n";
+#endif
+            return;
+        }
+    }
+#ifdef DEBUG_ASBL
+    log_os << logtag << "Reached max number of assembly iterations: " << opt.maxAssemblyIterations << "\n";
+#endif
+}
+
diff --git a/src/c++/lib/assembly/SmallAssembler.hh b/src/c++/lib/assembly/SmallAssembler.hh
new file mode 100644
index 0000000..ea09ba1
--- /dev/null
+++ b/src/c++/lib/assembly/SmallAssembler.hh
@@ -0,0 +1,47 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Ole Schulz-Trieglaff
+///
+
+#pragma once
+
+#include "assembly/AssembledContig.hh"
+#include "assembly/AssemblyReadInfo.hh"
+#include "options/SmallAssemblerOptions.hh"
+
+
+/// \brief run a de-bruijn graph assembler intended for small-scale allele discovery
+///
+/// the assembler iteratively builds multiple contigs through a range of word sizes
+///
+/// \param[in] opt assembly parameters
+/// \param[in] reads the set of reads to use for the assembly
+/// \param[out] assembledReadInfo for each read in 'reads', provide information on if and how it was assembled into a contig
+/// \param[out] contigs zero to many assembled contigs
+///
+void
+runSmallAssembler(
+    const SmallAssemblerOptions& opt,
+    const AssemblyReadInput& reads,
+    AssemblyReadOutput& assembledReadInfo,
+    Assembly& contigs);
+
diff --git a/src/c++/lib/assembly/test/CMakeLists.txt b/src/c++/lib/assembly/test/CMakeLists.txt
new file mode 100644
index 0000000..ebb21c8
--- /dev/null
+++ b/src/c++/lib/assembly/test/CMakeLists.txt
@@ -0,0 +1,29 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## Configuration file for the unit tests subdirectory
+##
+## author Ole Schulz-Trieglaff
+##
+################################################################################
+
+set(ADDITIONAL_UNITTEST_LIB manta_blt_util)
+include(${THIS_CXX_TEST_LIBRARY_CMAKE})
diff --git a/src/c++/lib/assembly/test/IterativeAssemblerTest.cpp b/src/c++/lib/assembly/test/IterativeAssemblerTest.cpp
new file mode 100644
index 0000000..1fa49bf
--- /dev/null
+++ b/src/c++/lib/assembly/test/IterativeAssemblerTest.cpp
@@ -0,0 +1,133 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Xiaoyu Chen
+///
+
+#include "boost/test/unit_test.hpp"
+
+#include "IterativeAssembler.cpp"
+
+
+BOOST_AUTO_TEST_SUITE( test_IterativeAssembler )
+
+
+BOOST_AUTO_TEST_CASE( test_CircleDetector )
+{
+    IterativeAssemblerOptions assembleOpt;
+    str_uint_map_t wordCount;
+    std::set<std::string> repeatWords;
+
+    wordCount["TACCA"] = 3;
+    wordCount["CCACC"] = 3;
+    wordCount["CACCA"] = 3;
+    wordCount["ACCAC"] = 3;
+    wordCount["CCACA"] = 3;
+    wordCount["CACAC"] = 3;
+    wordCount["ACACA"] = 3;
+    wordCount["AAAAA"] = 2;
+
+    getRepeatKmers(assembleOpt, wordCount, repeatWords);
+
+    // the first circle
+    BOOST_REQUIRE_EQUAL(repeatWords.count("ACCAC"), 1u);
+    BOOST_REQUIRE_EQUAL(repeatWords.count("CACCA"), 1u);
+    BOOST_REQUIRE_EQUAL(repeatWords.count("CCACC"), 1u);
+
+    BOOST_REQUIRE_EQUAL(repeatWords.count("TACCA"), 0u);
+    BOOST_REQUIRE_EQUAL(repeatWords.count("CCACA"), 0u);
+
+    // the second circle
+    BOOST_REQUIRE_EQUAL(repeatWords.count("CACAC"), 1u);
+    BOOST_REQUIRE_EQUAL(repeatWords.count("ACACA"), 1u);
+
+    // homopolymer: self-circle
+    BOOST_REQUIRE_EQUAL(repeatWords.count("AAAAA"), 1u);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_BasicAssembler )
+{
+    // test simple assembly functions at a single word size:
+    IterativeAssemblerOptions assembleOpt;
+
+    assembleOpt.minWordLength = 6;
+    assembleOpt.maxWordLength = 6;
+    assembleOpt.minCoverage = 2;
+
+    AssemblyReadInput reads;
+
+    reads.push_back("ACGTGTATTACC");
+    reads.push_back(  "GTGTATTACCTA");
+    reads.push_back(      "ATTACCTAGTAC");
+    reads.push_back(        "TACCTAGTACTC");
+    reads.push_back("123456789123");
+
+    AssemblyReadOutput readInfo;
+    Assembly contigs;
+
+    runIterativeAssembler(assembleOpt, reads, readInfo, contigs);
+
+    BOOST_REQUIRE_EQUAL(contigs.size(),1u);
+    BOOST_REQUIRE_EQUAL(contigs[0].seq,"GTGTATTACCTAGTAC");
+    for (unsigned i(0); i<4; ++i)
+    {
+        BOOST_REQUIRE(readInfo[i].isUsed);
+        BOOST_REQUIRE_EQUAL(readInfo[i].contigId,0u);
+    }
+    BOOST_REQUIRE(! readInfo[4].isUsed);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_IterativeKmer )
+{
+    // test simple assembly functions at a single word size:
+    IterativeAssemblerOptions assembleOpt;
+
+    assembleOpt.minWordLength = 3;
+    assembleOpt.maxWordLength = 9;
+    assembleOpt.wordStepSize = 2;
+    assembleOpt.minCoverage = 1;
+
+    AssemblyReadInput reads;
+
+    reads.push_back("ACACACACGATG");
+    reads.push_back(        "GATGTCTCTCTC");
+    reads.push_back("123456789123");
+
+    AssemblyReadOutput readInfo;
+    Assembly contigs;
+
+    runIterativeAssembler(assembleOpt, reads, readInfo, contigs);
+
+    BOOST_REQUIRE_EQUAL(contigs.size(),1u);
+    BOOST_REQUIRE_EQUAL(contigs[0].seq,"ACACACACGATGTCTCTCTC");
+    for (unsigned i(0); i<2; ++i)
+    {
+        BOOST_REQUIRE(readInfo[i].isUsed);
+        BOOST_REQUIRE_EQUAL(readInfo[i].contigId,0u);
+    }
+    BOOST_REQUIRE(! readInfo[2].isUsed);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/assembly/test/SmallAssemblerTest.cpp b/src/c++/lib/assembly/test/SmallAssemblerTest.cpp
new file mode 100644
index 0000000..ad9e75a
--- /dev/null
+++ b/src/c++/lib/assembly/test/SmallAssemblerTest.cpp
@@ -0,0 +1,158 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/test/unit_test.hpp"
+
+#include "SmallAssembler.hh"
+
+
+BOOST_AUTO_TEST_SUITE( test_SmallAssembler )
+
+
+BOOST_AUTO_TEST_CASE( test_SmallAssembler1 )
+{
+    // test simple assembly functions at a single word size:
+
+    SmallAssemblerOptions assembleOpt;
+
+    assembleOpt.minWordLength = 6;
+    assembleOpt.maxWordLength = 6;
+    assembleOpt.minCoverage = 2;
+    assembleOpt.minSeedReads = 3;
+
+    AssemblyReadInput reads;
+
+    reads.push_back("ACGTGTATTACC");
+    reads.push_back(  "GTGTATTACCTA");
+    reads.push_back(      "ATTACCTAGTAC");
+    reads.push_back(        "TACCTAGTACTC");
+    reads.push_back("123456789123");
+
+    AssemblyReadOutput readInfo;
+    Assembly contigs;
+
+    runSmallAssembler(assembleOpt, reads, readInfo, contigs);
+
+    BOOST_REQUIRE_EQUAL(contigs.size(),1u);
+    BOOST_REQUIRE_EQUAL(contigs[0].seq,"GTGTATTACCTAGTAC");
+    for (unsigned i(0); i<4; ++i)
+    {
+        BOOST_REQUIRE(readInfo[i].isUsed);
+        BOOST_REQUIRE_EQUAL(readInfo[i].contigId,0u);
+    }
+    BOOST_REQUIRE(! readInfo[4].isUsed);
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_PoisonRead )
+{
+    // test against observed case where a single bad read could kill the whole assembly
+
+    SmallAssemblerOptions assembleOpt;
+
+    assembleOpt.minWordLength = 6;
+    assembleOpt.maxWordLength = 6;
+    assembleOpt.minCoverage = 2;
+    assembleOpt.minSeedReads = 3;
+
+    AssemblyReadInput reads;
+
+    reads.push_back("ACGTGTATTACC");
+    reads.push_back(  "GTGTATTACCTA");
+    reads.push_back(      "ATTACCTAGTAC");
+    reads.push_back(        "TACCTAGTACTC");
+    reads.push_back("AAAAAAAAAAAAAAAAAAAA");
+
+    AssemblyReadOutput readInfo;
+    Assembly contigs;
+
+    runSmallAssembler(assembleOpt, reads, readInfo, contigs);
+
+    BOOST_REQUIRE_EQUAL(contigs.size(),1u);
+    BOOST_REQUIRE_EQUAL(contigs[0].seq,"GTGTATTACCTAGTAC");
+    for (unsigned i(0); i<4; ++i)
+    {
+        BOOST_REQUIRE(readInfo[i].isUsed);
+        BOOST_REQUIRE_EQUAL(readInfo[i].contigId,0u);
+    }
+    BOOST_REQUIRE(readInfo[4].isUsed);
+    BOOST_REQUIRE_EQUAL(readInfo[4].contigId,0u);
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_supportingReadConsistency )
+{
+    // test against observed case where a single bad read could kill the whole assembly
+
+    SmallAssemblerOptions assembleOpt;
+
+    assembleOpt.minWordLength = 6;
+    assembleOpt.maxWordLength = 6;
+    assembleOpt.minCoverage = 2;
+    assembleOpt.minSeedReads = 3;
+
+    AssemblyReadInput reads;
+    reads.push_back(        "AAACGTGTATTA");
+    reads.push_back(          "ACGTGTATTACC");
+    reads.push_back(           "CGTGTATTACCT");
+    reads.push_back(            "GTGTATTACCTA");
+    reads.push_back(                "ATTACCTAGTAC");
+    reads.push_back(                  "TACCTAGTACTC");
+    // the above reads build a contig ACGTG TATTACC TAGTAC
+    //
+    // Notice ACGTG should not be extended by adding 'A' to the left => AACGTG
+    // using the reads below, because they have a different suffix after ACGTG *GCC*
+    // Instead, the reads below build a contig CTTA GCTA ACGTG GCC
+    reads.push_back("CCCTTAGCTAAC");
+    reads.push_back(  "CTTAGCTAACGT");
+    reads.push_back(    "TAGCTAACGTGG");
+    reads.push_back(      "GCTAACGTGGCC");
+    reads.push_back(         "AACGTGGCCTAG");
+
+
+    AssemblyReadOutput readInfo;
+    Assembly contigs;
+
+    runSmallAssembler(assembleOpt, reads, readInfo, contigs);
+
+    BOOST_REQUIRE_EQUAL(contigs.size(),2u);
+    BOOST_REQUIRE_EQUAL(contigs[0].seq,"AACGTGTATTACCTAGTAC");
+    BOOST_REQUIRE_EQUAL(contigs[1].seq,"CTTAGCTAACGTGGCC");
+    for (unsigned i(0); i<6; ++i)
+    {
+        BOOST_REQUIRE(readInfo[i].isUsed);
+        BOOST_REQUIRE_EQUAL(readInfo[i].contigId,0u);
+    }
+
+    for (unsigned i(6); i<11; ++i)
+    {
+        BOOST_REQUIRE(readInfo[i].isUsed);
+        BOOST_REQUIRE_EQUAL(readInfo[i].contigId,1u);
+    }
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/assembly/test/test_main.cpp b/src/c++/lib/assembly/test/test_main.cpp
new file mode 100644
index 0000000..86aafa5
--- /dev/null
+++ b/src/c++/lib/assembly/test/test_main.cpp
@@ -0,0 +1,23 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#define BOOST_TEST_MODULE libassembly
+#include "boost/test/unit_test.hpp"
+
diff --git a/src/c++/lib/blt_util/CMakeLists.txt b/src/c++/lib/blt_util/CMakeLists.txt
new file mode 100644
index 0000000..9511b67
--- /dev/null
+++ b/src/c++/lib/blt_util/CMakeLists.txt
@@ -0,0 +1,28 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+###############################################################################
+##
+## Configuration file for the c++/blt_util subfolder
+##
+## author Come Raczy
+##
+################################################################################
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/blt_util/CircularCounter.hh b/src/c++/lib/blt_util/CircularCounter.hh
new file mode 100644
index 0000000..e742f24
--- /dev/null
+++ b/src/c++/lib/blt_util/CircularCounter.hh
@@ -0,0 +1,129 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <cassert>
+
+#include <vector>
+
+
+/// A circular buffer of fixed size, S
+///
+/// - true/false values can be pushed in
+/// - total true count among the last S pushes can be queried at any point
+///    - count() is O(1) operation
+///
+struct CircularCounter
+{
+    CircularCounter(
+        const unsigned initSize) :
+        _count(0),
+        _headPos(0),
+        _dataSize(0),
+        _maxCount(0),
+        _data(initSize,false)
+    {
+        assert(initSize>0);
+    }
+
+    void
+    push(const bool val)
+    {
+        if (_data[_headPos])
+        {
+            if (!val)
+            {
+                assert(_count>0);
+                _count--;
+            }
+        }
+        else
+        {
+            if (val) _count++;
+            if (_count > _maxCount) _maxCount = _count;
+        }
+        _data[_headPos] = val;
+        _headPos = nextPos();
+        if (_dataSize < size()) _dataSize++;
+    }
+
+    // change the value on the head of the buffer
+    void
+    replace(const bool val)
+    {
+        assert(_dataSize>0);
+        _headPos=lastPos();
+        _dataSize--;
+        push(val);
+    }
+
+    unsigned
+    count() const
+    {
+        return _count;
+    }
+
+    unsigned
+    maxCount() const
+    {
+        return _maxCount;
+    }
+
+    /// less than or equal to size(), according to the number
+    /// of observations pushed
+    unsigned
+    dataSize() const
+    {
+        return _dataSize;
+    }
+
+    unsigned
+    size() const
+    {
+        return _data.size();
+    }
+
+private:
+    unsigned
+    lastPos() const
+    {
+        if (_headPos==0) return (size()-1);
+        return _headPos-1;
+    }
+
+    unsigned
+    nextPos() const
+    {
+        const unsigned pos(_headPos+1);
+        if (pos>=size()) return 0;
+        return pos;
+    }
+
+    unsigned _count;
+    unsigned _headPos;
+    unsigned _dataSize;
+    unsigned _maxCount;
+    std::vector<bool> _data;
+};
diff --git a/src/c++/lib/blt_util/LinearScaler.hh b/src/c++/lib/blt_util/LinearScaler.hh
new file mode 100644
index 0000000..610ec6b
--- /dev/null
+++ b/src/c++/lib/blt_util/LinearScaler.hh
@@ -0,0 +1,71 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <cassert>
+
+#include <algorithm>
+
+
+/// return [0,1] to describe where a value is between min and max values
+///
+template <typename T>
+struct LinearScaler
+{
+    LinearScaler() :
+        _min(static_cast<T>(0)),
+        _factor(1.)
+    {}
+
+    LinearScaler(
+        const T min,
+        const T max)
+    {
+        init(min, max);
+    }
+
+    void
+    init(
+        const T min,
+        const T max)
+    {
+        assert(max>min);
+        _min = min;
+        _factor = (1./static_cast<double>(max-min));
+    }
+
+    double
+    getScale(
+        const T val) const
+    {
+        static const double zero(0);
+        static const double one(1);
+        return std::min(one, std::max(zero, static_cast<double>(val-_min)*_factor));
+    }
+
+private:
+    T _min;
+    double _factor;
+};
diff --git a/src/c++/lib/blt_util/MedianDepthTracker.hh b/src/c++/lib/blt_util/MedianDepthTracker.hh
new file mode 100644
index 0000000..a034ef6
--- /dev/null
+++ b/src/c++/lib/blt_util/MedianDepthTracker.hh
@@ -0,0 +1,93 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <cassert>
+
+#include <map>
+
+
+/// online median tracking obj assuming high repeat obs counts
+///
+/// Note that by design depth=0 is excluded from the median
+struct MedianDepthTracker
+{
+    void
+    addObs(const unsigned val)
+    {
+        auto iter(_cmap.find(val));
+        if (iter == _cmap.end())
+        {
+            _cmap[val] = 1;
+        }
+        else
+        {
+            iter->second++;
+        }
+        _total++;
+    }
+
+    double
+    getMedian() const
+    {
+        // +1 makes the 1/2 case work out correctly...
+        unsigned ztotal(_total+1);
+        const auto ziter(_cmap.find(0));
+        if (ziter != _cmap.end())
+        {
+            ztotal -= ziter->second;
+        }
+
+        unsigned sum = 0;
+        unsigned lastBefore = 0;
+        unsigned firstAfter = 0;
+        for (const auto& val : _cmap)
+        {
+            if (val.first == 0) continue;
+
+            // double instead of half so that we stay away from float math:
+            sum += (val.second*2);
+            if (sum >= ztotal)
+            {
+                firstAfter = val.first;
+                if ((ztotal + val.second*2) != (sum + 1))
+                {
+                    lastBefore = firstAfter;
+                }
+                break;
+            }
+            lastBefore = val.first;
+        }
+
+        assert ((sum+1) >= ztotal);
+
+        if (lastBefore == firstAfter) return lastBefore;
+        return (static_cast<double>(lastBefore + firstAfter)/2.);
+    }
+
+private:
+    unsigned _total = 0;
+    std::map<unsigned,unsigned> _cmap;
+};
diff --git a/src/c++/lib/blt_util/PolymorphicObject.hh b/src/c++/lib/blt_util/PolymorphicObject.hh
new file mode 100644
index 0000000..5c84e39
--- /dev/null
+++ b/src/c++/lib/blt_util/PolymorphicObject.hh
@@ -0,0 +1,48 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#pragma once
+
+
+/// convenience base class for polymorphic objects
+///
+/// This class helps get around some of the boilerplate imposed by
+/// c++ spec implicit copy ctor rules for virtual classes. Use
+/// this as a base class for any standard virtual object with liberal
+/// default copy/move semantics.
+///
+/// Per suggestion from: http://stackoverflow.com/questions/19997646/no-implicit-copy-constructor-in-polymorphic-class
+///
+struct PolymorphicObject
+{
+    PolymorphicObject() = default;
+    virtual ~PolymorphicObject() = default;
+
+    explicit
+    PolymorphicObject(const PolymorphicObject&) = default;
+    PolymorphicObject& operator =(const PolymorphicObject&) = default;
+
+#if ((!defined(_MSC_VER)) || (_MSC_VER > 1800))
+    // support moving
+    explicit
+    PolymorphicObject(PolymorphicObject&&) = default;
+    PolymorphicObject& operator=(PolymorphicObject&&) = default;
+#endif
+};
diff --git a/src/c++/lib/blt_util/RangeMap.hh b/src/c++/lib/blt_util/RangeMap.hh
new file mode 100644
index 0000000..ea5e596
--- /dev/null
+++ b/src/c++/lib/blt_util/RangeMap.hh
@@ -0,0 +1,343 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/blt_exception.hh"
+
+#include "boost/dynamic_bitset.hpp"
+
+#include <algorithm>
+#include <sstream>
+#include <vector>
+
+//#define DEBUG_RMAP
+
+#ifdef DEBUG_RMAP
+#include <iostream>
+#endif
+
+
+/// two predefined options for the ValClear type parameter to RangeMap:
+template <typename T>
+struct ZeroT
+{
+    void
+    operator()(T& val) const
+    {
+        val = 0;
+    }
+};
+
+template <typename T>
+struct ClearT
+{
+    void
+    operator()(T& val) const
+    {
+        val.clear();
+    }
+};
+
+
+/// provides map-like storage for a set of positions which are assumed to
+/// cluster in a small range
+///
+/// in practice this is very similar to an unbounded circular buffer,
+/// but heavily customized for our specific application
+///
+template <typename KeyType, typename ValType, typename ValClear = ZeroT<ValType>>
+struct RangeMap
+{
+    ///\TODO automate this w/ static assert/concepts:
+    //Keytype must implement operator < +/-
+
+    static const unsigned defaultMinChunk = 1024;
+
+    /// \param minChunk the storage buffer operates in units of minChunk, this
+    ///                 setting could impact performance in some specialized
+    ///                 cases but in general shouldn't need to be set
+    explicit
+    RangeMap(const unsigned minChunk = defaultMinChunk) :
+        _minChunk(minChunk),
+        _isEmpty(true),
+        _minKeyIndex(0),
+        _data(_minChunk),
+        _occup(_minChunk)
+    {}
+
+    void
+    clear()
+    {
+        _isEmpty=true;
+        _minKeyIndex=0;
+        _occup.reset();
+    }
+
+    bool
+    empty() const
+    {
+        return _isEmpty;
+    }
+
+    bool
+    isKeyPresent(
+        const KeyType& k) const
+    {
+        return (! ((_isEmpty) || (k < _minKey) || (k > _maxKey) || (! _occup.test(getKeyIndex(k)))));
+    }
+
+    /// get a mutable reference for the value associated with key k
+    ///
+    /// if k does not exist then it is initialized according to ValClear type parameter
+    ValType&
+    getRef(
+        const KeyType& k)
+    {
+        if (_isEmpty)
+        {
+            _minKey=k;
+        }
+        else if (k < _minKey)
+        {
+            expand(_maxKey-k+1);
+            const unsigned dataSize(_data.size());
+            _minKeyIndex = ((_minKeyIndex + dataSize)-(_minKey-k) ) % dataSize;
+            _minKey = k;
+        }
+
+        if ((_isEmpty) || (k > _maxKey))
+        {
+            expand(k-_minKey+1);
+            _maxKey = k;
+            _isEmpty=false;
+        }
+
+
+        const unsigned kindex(getKeyIndex(k));
+        if (! _occup.test(kindex))
+        {
+            _clearFunc(_data[kindex]);
+            _occup.set(kindex);
+        }
+
+        return _data[kindex];
+    }
+
+    /// get a const reference to key's associated value
+    ///
+    /// exception thrown in key is absent
+    const ValType&
+    getConstRef(
+        const KeyType& k) const
+    {
+        enforceKeyPresent(k);
+        return _data[getKeyIndex(k)];
+    }
+
+    /// get a const reference to key's associated value, or provide reference to specified default
+    const ValType&
+    getConstRefDefault(
+        const KeyType& k,
+        const ValType& defaultVal) const
+    {
+        if (! isKeyPresent(k)) return defaultVal;
+        return _data[getKeyIndex(k)];
+    }
+
+    void
+    erase(
+        const KeyType& k)
+    {
+        enforceKeyPresent(k);
+        const unsigned kindex(getKeyIndex(k));
+        _occup.reset(kindex);
+
+        if (k != _minKey) return;
+
+        resetMinKey();
+    }
+
+    /// erase all contents with keys sorting less than or equal to k
+    void
+    eraseTo(
+        const KeyType& k)
+    {
+        // special cases:
+        if (_isEmpty) return;
+        if (_minKey > k) return;
+        if (_maxKey <= k)
+        {
+            clear();
+            return;
+        }
+
+        if (_minKey == k)
+        {
+            /// accelerate/simplify _occup setting for common use case of erasing
+            /// a single position off the end of contiguous key block
+            _occup.reset(_minKeyIndex);
+        }
+        else
+        {
+            boost::dynamic_bitset<>& mask(_occup_mask_helper);
+            mask.resize(1+k-_minKey);
+            mask.reset();
+            mask.resize(_occup.size(),true);
+            rotateLeft(mask,_minKeyIndex);
+            _occup &= mask;
+        }
+
+        resetMinKey();
+    }
+
+#ifdef DEBUG_RMAP
+    /// debug dumper:
+    void
+    dump(const char* msg, std::ostream& os) const
+    {
+        os << "rangeMap dump: " << msg << "\n"
+           << "\tempty: " << _isEmpty << "\n"
+           << "\tminKeyIndex: " << _minKeyIndex << "\n"
+           << "\tminKey: " << _minKey << "\n"
+           << "\tmaxKey: " << _maxKey << "\n"
+           << "\tdatasize: " << _data.size() << "\n";
+    }
+#endif
+
+private:
+    /// update minKey based on new occupy values:
+    void
+    resetMinKey()
+    {
+        // we have to shift minKey up to the next valid value:
+        const unsigned keySize(_maxKey-_minKey);
+        for (unsigned offset(1); offset<=keySize; ++offset)
+        {
+            const unsigned testIndex(getKeyIndexOffset(offset));
+            if (! _occup.test(testIndex)) continue;
+            _minKeyIndex = testIndex;
+            _minKey += offset;
+            return;
+        }
+
+        _isEmpty=true;
+    }
+
+    /// assumes offset has already been validated!
+    unsigned
+    getKeyIndexOffset(
+        const unsigned offset) const
+    {
+        // the following should be faster than a modulus but
+        // still handle all cases:
+        const unsigned i(_minKeyIndex + offset);
+        const unsigned d(_data.size());
+        if (i < d) return i;
+        return (i-d);
+    }
+
+    /// assumes key has already been validated!
+    unsigned
+    getKeyIndex(
+        const KeyType& k) const
+    {
+        return getKeyIndexOffset(k-_minKey);
+    }
+
+    /// rotate data so that minKeyIndex is 0
+    void
+    normRotate()
+    {
+        if (_minKeyIndex==0) return;
+        std::rotate(_data.begin(),_data.begin()+_minKeyIndex,_data.end());
+        rotateRight(_occup,_minKeyIndex);
+        _minKeyIndex=0;
+    }
+
+    // expand to larger of 2x current size or minSize+minChunk:
+    void
+    expand(
+        const unsigned minSize)
+    {
+        if (minSize <= _data.size()) return;
+        const unsigned newSize(std::max(static_cast<unsigned>(2*_data.size()),minSize+_minChunk));
+        normRotate();
+        _data.resize(newSize);
+        _occup.resize(newSize);
+    }
+
+    void
+    enforceKeyPresent(
+        const KeyType& k) const
+    {
+        if (isKeyPresent(k)) return;
+        std::ostringstream oss;
+        oss << "Attempting to retrieve an invalid key '" << k << "'\n";
+        throw blt_exception(oss.str().c_str());
+    }
+
+    void
+    rotateLeft(
+        boost::dynamic_bitset<>& a,
+        unsigned n)
+    {
+        if (n==0) return;
+        const unsigned s(a.size());
+        assert(n<s);
+        boost::dynamic_bitset<>& b(_occup_rotate_helper);
+        b = a;
+        b >>= (s-n);
+        a <<= n;
+        a |= b;
+    }
+
+    void
+    rotateRight(
+        boost::dynamic_bitset<>& a,
+        unsigned n)
+    {
+        if (n==0) return;
+        const unsigned s(a.size());
+        assert(n<s);
+        rotateLeft(a, (s-n));
+    }
+
+    const unsigned _minChunk;
+
+    bool _isEmpty;
+    unsigned _minKeyIndex;
+    KeyType _minKey;
+    KeyType _maxKey;
+    std::vector<ValType> _data;
+    boost::dynamic_bitset<> _occup;
+
+    ///< used to cache the copy needed for masking:
+    boost::dynamic_bitset<> _occup_mask_helper;
+
+    ///< used to cache the copy needed for rotate:
+    boost::dynamic_bitset<> _occup_rotate_helper;
+
+    ValClear _clearFunc;
+};
diff --git a/src/c++/lib/blt_util/ReadKey.cpp b/src/c++/lib/blt_util/ReadKey.cpp
new file mode 100644
index 0000000..d819569
--- /dev/null
+++ b/src/c++/lib/blt_util/ReadKey.cpp
@@ -0,0 +1,39 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "blt_util/ReadKey.hh"
+
+#include <iostream>
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const ReadKey& rk)
+{
+    os << rk.qname() << '/' << rk.readNo();
+    return os;
+}
+
diff --git a/src/c++/lib/blt_util/ReadKey.hh b/src/c++/lib/blt_util/ReadKey.hh
new file mode 100644
index 0000000..73772fb
--- /dev/null
+++ b/src/c++/lib/blt_util/ReadKey.hh
@@ -0,0 +1,120 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/compat_util.hh"
+#include "htsapi/bam_record.hh"
+
+#include "boost/utility.hpp"
+
+#include <cstdlib>
+#include <cstring>
+
+#include <iosfwd>
+#include <string>
+
+
+/// information required to uniquely identify a read:
+///
+struct ReadKey
+{
+    ReadKey(
+        const bam_record& br,
+        const bool isCopyPtrs = true)
+        : _isCopyPtrs(isCopyPtrs)
+        , _qname((_isCopyPtrs && (NULL != br.qname())) ? strdup(br.qname()) : br.qname())
+        , _readNo(br.read_no())
+    {
+        assert(NULL != _qname);
+    }
+
+    ReadKey(
+        const char* initQname,
+        const int initReadNo,
+        const bool isCopyPtrs = true)
+        : _isCopyPtrs(isCopyPtrs)
+        , _qname((_isCopyPtrs && (NULL != initQname)) ? strdup(initQname) : initQname)
+        , _readNo(initReadNo)
+    {
+        assert(NULL != _qname);
+    }
+
+    ReadKey(
+        const ReadKey& rhs)
+        : _isCopyPtrs(rhs._isCopyPtrs)
+        , _qname(_isCopyPtrs ? strdup(rhs._qname) : rhs._qname)
+        , _readNo(rhs._readNo)
+    {}
+
+private:
+    ReadKey& operator=(const ReadKey& rhs);
+
+public:
+    ~ReadKey()
+    {
+        if (_isCopyPtrs)
+        {
+            if (NULL != _qname) free(const_cast<char*>(_qname));
+        }
+    }
+
+    int
+    readNo() const
+    {
+        return _readNo;
+    }
+
+    const char*
+    qname() const
+    {
+        return _qname;
+    }
+
+    bool operator<(
+        const ReadKey& rhs) const
+    {
+        if (readNo() < rhs.readNo()) return true;
+        if (readNo() == rhs.readNo())
+        {
+            return (strcmp(qname(), rhs.qname()) < 0);
+        }
+        return false;
+    }
+
+    bool operator==(
+        const ReadKey& rhs) const
+    {
+        return ((readNo() == rhs.readNo()) && ((0 == strcmp(qname(), rhs.qname()))));
+    }
+
+private:
+    const bool _isCopyPtrs;
+    const char* _qname;
+    const int _readNo;
+};
+
+std::ostream&
+operator<<(std::ostream& os, const ReadKey& rk);
+
diff --git a/src/c++/lib/blt_util/RegionSum.hh b/src/c++/lib/blt_util/RegionSum.hh
new file mode 100644
index 0000000..14f50e6
--- /dev/null
+++ b/src/c++/lib/blt_util/RegionSum.hh
@@ -0,0 +1,72 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_types.hh"
+#include "known_pos_range2.hh"
+
+#include "boost/icl/interval_map.hpp"
+
+
+/// accumulates region specific sum(T) give a set of (region,T) associations
+///
+template <typename T>
+struct RegionSum
+{
+    void
+    clear()
+    {
+        _map.clear();
+    }
+
+    void
+    add(
+        const known_pos_range2 pr,
+        const T val)
+    {
+        _map.add(std::make_pair(boost::icl::interval<pos_t>::right_open(pr.begin_pos(),pr.end_pos()),val));
+    }
+
+    /// return peak value for all regions:
+    T
+    maxVal() const
+    {
+        T max(0);
+        bool isFirst(true);
+        for (const auto& val : _map)
+        {
+            if (isFirst || val.second > max)
+            {
+                max = val.second;
+                isFirst = false;
+            }
+        }
+        return max;
+    }
+
+private:
+    using map_t = boost::icl::interval_map<pos_t,T>;
+    map_t _map;
+};
diff --git a/src/c++/lib/blt_util/RegionTracker.cpp b/src/c++/lib/blt_util/RegionTracker.cpp
new file mode 100644
index 0000000..54e3ed4
--- /dev/null
+++ b/src/c++/lib/blt_util/RegionTracker.cpp
@@ -0,0 +1,118 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "blt_util/RegionTracker.hh"
+
+
+
+bool
+RegionTracker::
+isIntersectRegionImpl(
+    const pos_t beginPos,
+    const pos_t endPos) const
+{
+    if (_regions.empty()) return false;
+
+    // 1. find first region where region.endPos > query.beginPos
+    const auto posIter(_regions.upper_bound(known_pos_range2(beginPos,beginPos)));
+    if (posIter == _regions.end()) return false;
+
+    // 2. conclusion based on non-overlapping region constraint
+    return (posIter->begin_pos() < endPos);
+}
+
+
+bool
+RegionTracker::
+isSubsetOfRegionImpl(
+    const pos_t beginPos,
+    const pos_t endPos) const
+{
+    if (_regions.empty()) return false;
+
+    // 1. find first region where region.endPos > query.beginPos
+    const auto posIter(_regions.upper_bound(known_pos_range2(beginPos,beginPos)));
+    if (posIter == _regions.end()) return false;
+    if (posIter->end_pos() < endPos) return false;
+
+    // 2. conclusion based on non-overlapping region constraint
+    return (posIter->begin_pos() <= beginPos);
+}
+
+
+
+void
+RegionTracker::
+addRegion(
+    known_pos_range2 range)
+{
+    // check for potential set of intersecting ranges,
+    // if found expand range size to represent intersection
+    // remove previous content:
+    const auto startOlap(_regions.upper_bound(known_pos_range2(range.begin_pos()-1,range.begin_pos()-1)));
+    if (startOlap != _regions.end() && startOlap->begin_pos() <= (range.begin_pos()-1))
+    {
+        // start intersects range:
+        range.set_begin_pos(startOlap->begin_pos());
+    }
+    auto endOlap(_regions.upper_bound(known_pos_range2(range.end_pos(),range.end_pos())));
+    if (endOlap != _regions.end() && endOlap->begin_pos() <= (range.end_pos()))
+    {
+        // end intersects range:
+        range.set_end_pos(endOlap->end_pos());
+        endOlap++;
+    }
+    _regions.erase(startOlap,endOlap);
+    _regions.insert(range);
+}
+
+
+
+void
+RegionTracker::
+removeToPos(
+    const pos_t pos)
+{
+    auto iter(_regions.begin());
+    const auto endIter(_regions.end());
+    while ( (iter != endIter) && (iter->end_pos() <= (pos+1)) )
+    {
+        ++iter;
+    }
+    _regions.erase(_regions.begin(),iter);
+}
+
+
+
+void
+RegionTracker::
+dump(
+    std::ostream& os) const
+{
+    os << "RegionTracker\n";
+    for (const auto& val : _regions)
+    {
+        os << "region: " << val << "\n";
+    }
+}
diff --git a/src/c++/lib/blt_util/RegionTracker.hh b/src/c++/lib/blt_util/RegionTracker.hh
new file mode 100644
index 0000000..ce678ca
--- /dev/null
+++ b/src/c++/lib/blt_util/RegionTracker.hh
@@ -0,0 +1,222 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/known_pos_range2.hh"
+
+#include "boost/optional.hpp"
+
+#include <iosfwd>
+#include <map>
+#include <set>
+
+
+/// sort pos range using end_pos as the primary sort key
+struct PosRangeEndSort
+{
+    bool
+    operator()(
+        const known_pos_range2& lhs,
+        const known_pos_range2& rhs) const
+    {
+        if (lhs.end_pos() < rhs.end_pos()) return true;
+        if (lhs.end_pos() == rhs.end_pos())
+        {
+            if (lhs.begin_pos() < rhs.begin_pos()) return true;
+        }
+        return false;
+    }
+};
+
+
+/// facilitate 'rolling' region tracking and position intersect queries
+///
+struct RegionTracker
+{
+    bool
+    empty() const
+    {
+        return _regions.empty();
+    }
+
+    void
+    clear()
+    {
+        _regions.clear();
+    }
+
+    /// is single position in a tracked region?
+    bool
+    isIntersectRegion(
+        const pos_t pos) const
+    {
+        return isIntersectRegionImpl(pos,pos+1);
+    }
+
+    /// does range intersect any tracked region?
+    bool
+    isIntersectRegion(
+        const known_pos_range2 range) const
+    {
+        return isIntersectRegionImpl(range.begin_pos(),range.end_pos());
+    }
+
+    /// is range entirely contained in a region?
+    bool
+    isSubsetOfRegion(
+        const known_pos_range2 range) const
+    {
+        return isSubsetOfRegionImpl(range.begin_pos(),range.end_pos());
+    }
+
+    /// add region
+    ///
+    /// any overlaps and adjacencies with existing regions in the tracker will be collapsed
+    void
+    addRegion(
+        known_pos_range2 range);
+
+    /// remove all regions which end (inclusive) before pos+1
+    void
+    removeToPos(
+        const pos_t pos);
+
+    // debug util
+    void
+    dump(
+        std::ostream& os) const;
+
+    unsigned
+    size() const
+    {
+        return _regions.size();
+    }
+
+    typedef std::set<known_pos_range2,PosRangeEndSort>  region_t;
+
+private:
+
+    bool
+    isIntersectRegionImpl(
+        const pos_t beginPos,
+        const pos_t endPos) const;
+
+    bool
+    isSubsetOfRegionImpl(
+        const pos_t beginPos,
+        const pos_t endPos) const;
+
+    region_t _regions;
+};
+
+
+/// facilitate 'rolling' region tracking and position intersect queries
+///
+/// this version of RegionTracker carries a payload associated with each region
+///
+template <typename T>
+struct RegionPayloadTracker
+{
+    bool
+    empty() const
+    {
+        return _regions.empty();
+    }
+
+    /// is single position in a tracked region w/ payload?
+    boost::optional<T>
+    isIntersectRegion(
+        const pos_t pos) const
+    {
+        return isIntersectRegionImpl(pos,pos+1);
+    }
+
+    // commenting out pending definition of expected behavior when
+    // the query range intercepts more than one tracked range, what
+    // is the payload returned in such a case?
+#if 0
+    /// does range intersect any tracked region w/ payload?
+    boost::optional<T>
+    isIntersectRegion(
+        const known_pos_range2 range) const
+    {
+        return isIntersectRegionImpl(range.begin_pos(),range.end_pos());
+    }
+#endif
+
+    /// is range entirely contained in a tracked region w/ payload?
+    boost::optional<T>
+    isSubsetOfRegion(
+        const known_pos_range2 range) const
+    {
+        return isSubsetOfRegionImpl(range.begin_pos(),range.end_pos());
+    }
+
+    /// add region
+    ///
+    /// any non-conflicting overlaps and adjacencies with existing regions in the tracker will be collapsed
+    ///
+    /// \returns false when there is an overlapping payload conflict. in this case the region is not inserted
+    bool
+    addRegion(
+        known_pos_range2 range,
+        const T payload);
+
+    /// remove all regions which end (inclusive) before pos+1
+    void
+    removeToPos(
+        const pos_t pos);
+
+    // debug util
+    void
+    dump(
+        std::ostream& os) const;
+
+    typedef typename std::map<known_pos_range2,T,PosRangeEndSort> region_t;
+
+    unsigned
+    size() const
+    {
+        return _regions.size();
+    }
+
+private:
+
+    boost::optional<T>
+    isIntersectRegionImpl(
+        const pos_t beginPos,
+        const pos_t endPos) const;
+
+    boost::optional<T>
+    isSubsetOfRegionImpl(
+        const pos_t beginPos,
+        const pos_t endPos) const;
+
+    region_t _regions;
+};
+
+
+#include "RegionTrackerImpl.hh"
+
diff --git a/src/c++/lib/blt_util/RegionTrackerImpl.hh b/src/c++/lib/blt_util/RegionTrackerImpl.hh
new file mode 100644
index 0000000..f2fed41
--- /dev/null
+++ b/src/c++/lib/blt_util/RegionTrackerImpl.hh
@@ -0,0 +1,173 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include <iostream>
+
+
+
+template <typename T>
+boost::optional<T>
+RegionPayloadTracker<T>::
+isIntersectRegionImpl(
+    const pos_t beginPos,
+    const pos_t endPos) const
+{
+    boost::optional<T> result;
+    if (_regions.empty()) return result;
+
+    // 1. find first region where region.endPos > query.beginPos
+    const auto posIter(_regions.upper_bound(known_pos_range2(beginPos,beginPos)));
+
+    // 2. conclusion based on non-overlapping region constraint
+    if ((posIter != _regions.end()) && (posIter->first.begin_pos() < endPos))
+    {
+        result.reset(posIter->second);
+    }
+    return result;
+}
+
+
+
+template <typename T>
+boost::optional<T>
+RegionPayloadTracker<T>::
+isSubsetOfRegionImpl(
+    const pos_t beginPos,
+    const pos_t endPos) const
+{
+    boost::optional<T> result;
+    if (_regions.empty()) return result;
+
+    // 1. find first region where region.endPos > query.beginPos
+    const auto posIter(_regions.upper_bound(known_pos_range2(beginPos,beginPos)));
+
+    // 2. conclusion based on non-overlapping region constraint
+    if (posIter == _regions.end()) return result;
+    if (posIter->first.end_pos() < endPos) return result;
+
+    // 2. conclusion based on non-overlapping region constraint
+    if (posIter->first.begin_pos() <= beginPos)
+    {
+        result.reset(posIter->second);
+    }
+    return result;
+}
+
+
+
+template <typename T>
+bool
+RegionPayloadTracker<T>::
+addRegion(
+    known_pos_range2 range,
+    const T payload)
+{
+    // check for potential set of intersecting ranges,
+    // if found expand range size to represent intersection
+    // remove previous content:
+    auto startOlap(_regions.upper_bound(known_pos_range2(range.begin_pos()-1,range.begin_pos()-1)));
+    while (startOlap != _regions.end())
+    {
+        // if adjacent, check that payload values match:
+        if (startOlap->first.end_pos() == range.begin_pos())
+        {
+            if (startOlap->second != payload)
+            {
+                startOlap++;
+                continue;
+            }
+        }
+        if (startOlap->first.begin_pos() <= (range.begin_pos()-1))
+        {
+            // start intersects range:
+            range.set_begin_pos(startOlap->first.begin_pos());
+        }
+        break;
+    }
+
+    auto endOlap(_regions.upper_bound(known_pos_range2(range.end_pos(),range.end_pos())));
+    if (endOlap != _regions.end())
+    {
+        // if adjacent, check that payload values match:
+        bool isMerge(false);
+        if (endOlap->first.begin_pos() == range.end_pos())
+        {
+            isMerge=(endOlap->second == payload);
+        }
+        else if (endOlap->first.begin_pos() < range.end_pos())
+        {
+            isMerge=true;
+        }
+
+        if (isMerge)
+        {
+            // end intersects range:
+            range.set_end_pos(endOlap->first.end_pos());
+            endOlap++;
+        }
+    }
+
+    // check for overlap conflicts:
+    for (auto iter(startOlap); iter != endOlap; ++iter)
+    {
+        if (iter->second != payload) return false;
+    }
+
+    _regions.erase(startOlap,endOlap);
+    _regions.insert(std::make_pair(range,payload));
+    return true;
+}
+
+
+
+template <typename T>
+void
+RegionPayloadTracker<T>::
+removeToPos(
+    const pos_t pos)
+{
+    auto iter(_regions.begin());
+    const auto endIter(_regions.end());
+    while ( (iter != endIter) && (iter->first.end_pos() <= (pos+1)) )
+    {
+        ++iter;
+    }
+    _regions.erase(_regions.begin(),iter);
+}
+
+
+
+template <typename T>
+void
+RegionPayloadTracker<T>::
+dump(
+    std::ostream& os) const
+{
+    os << "RegionPayloadTracker\n";
+    for (const auto& val : _regions)
+    {
+        os << "region: " << val.first << " value: " << val.second << "\n";
+    }
+}
+
diff --git a/src/c++/lib/blt_util/SampleVector.hh b/src/c++/lib/blt_util/SampleVector.hh
new file mode 100644
index 0000000..cff7d7a
--- /dev/null
+++ b/src/c++/lib/blt_util/SampleVector.hh
@@ -0,0 +1,65 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <vector>
+
+
+/// random sub-sampling array
+///
+/// This is an array with sized fixed to S at instantiation time. The
+/// array accepts N (N>=S) objects as input. For any N>=S, the array
+/// contains a given input object with probability S/N.
+///
+/// This behavior is implemented via standard reservoir sampling
+///
+template <typename T, typename RNG>
+struct SampleVector
+{
+    /// \param initrng c++11 <random> rng generator, see std::shuffle for detailed doc of similar parameter
+    SampleVector(
+        const unsigned initSize,
+        RNG& initRng)
+        : _inputCount(0),
+          _data(initSize,0),
+          _rng(initRng)
+    {}
+
+    void
+    push(const T& val);
+
+    const std::vector<T>&
+    data() const
+    {
+        return _data;
+    }
+
+private:
+    unsigned _inputCount;
+    std::vector<T> _data;
+    RNG& _rng;
+};
+
+#include "SampleVectorImpl.hh"
diff --git a/src/c++/lib/blt_util/SampleVectorImpl.hh b/src/c++/lib/blt_util/SampleVectorImpl.hh
new file mode 100644
index 0000000..2d65ca2
--- /dev/null
+++ b/src/c++/lib/blt_util/SampleVectorImpl.hh
@@ -0,0 +1,51 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include <random>
+
+
+
+template <typename T, typename RNG>
+void
+SampleVector<T,RNG>::
+push(const T& val)
+{
+    if (_inputCount < _data.size())
+    {
+        // initial fill of the reservoir array is deterministic:
+        _data[_inputCount] = val;
+    }
+    else
+    {
+        // replace elements with gradually decreasing probability:
+        std::uniform_int_distribution<unsigned> rdist(0,_inputCount);
+        const unsigned rval(rdist(_rng));
+
+        if (rval < _data.size())
+        {
+            _data[rval] = val;
+        }
+    }
+    _inputCount++;
+}
diff --git a/src/c++/lib/blt_util/SimpleAlignment.cpp b/src/c++/lib/blt_util/SimpleAlignment.cpp
new file mode 100644
index 0000000..2002808
--- /dev/null
+++ b/src/c++/lib/blt_util/SimpleAlignment.cpp
@@ -0,0 +1,139 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+///
+///
+
+#include "SimpleAlignment.hh"
+
+#include <cassert>
+
+#include <iostream>
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SimpleAlignment& sa)
+{
+    os << "alignment: " << sa.tid << ':' << sa.pos << (sa.is_fwd_strand ? '+' : '-') << ' ' << sa.path;
+    return os;
+}
+
+
+
+/// convert segment_type to match if the segment exists before or after all match segments currently in the alignment
+///
+SimpleAlignment
+matchifyEdgeSegmentType(
+    const SimpleAlignment& al,
+    const ALIGNPATH::align_t segmentType,
+    const bool isMatchLeadingEdge,
+    const bool isMatchTrailingEdge)
+{
+    using namespace ALIGNPATH;
+
+    assert(is_segment_type_read_length(segmentType));
+
+    SimpleAlignment al2;
+    al2.is_fwd_strand=al.is_fwd_strand;
+    al2.tid=al.tid;
+    al2.pos=al.pos;
+
+    const std::pair<unsigned,unsigned> ends(get_match_edge_segments(al.path));
+    const unsigned as(al.path.size());
+    for (unsigned i(0); i<as; ++i)
+    {
+        const path_segment& ps(al.path[i]);
+        const bool isLeadingEdgeSegment(i<ends.first);
+        const bool isTrailingEdgeSegment(i>ends.second);
+        const bool isTargetType(ps.type==segmentType);
+        const bool isCandidateEdge((isMatchLeadingEdge && isLeadingEdgeSegment) ||
+                                   (isMatchTrailingEdge && isTrailingEdgeSegment));
+        const bool isEdgeTarget(isCandidateEdge && isTargetType);
+        if (isEdgeTarget && isLeadingEdgeSegment) al2.pos-=ps.length;
+        if (isEdgeTarget || (ps.type==MATCH))
+        {
+            if ((! al2.path.empty()) && (al2.path.back().type == MATCH))
+            {
+                al2.path.back().length += ps.length;
+            }
+            else
+            {
+                al2.path.push_back(ps);
+                al2.path.back().type = MATCH;
+            }
+        }
+        else
+        {
+            al2.path.push_back(ps);
+        }
+    }
+
+    return al2;
+}
+
+
+/// get the range in reference coordinates if you did run matchifyEdgeSoftClip on an alignment:
+known_pos_range2
+matchifyEdgeSoftClipRefRange(const SimpleAlignment& al)
+{
+    using namespace ALIGNPATH;
+
+    pos_t beginPos(al.pos);
+    pos_t endPos(beginPos);
+
+    const std::pair<unsigned,unsigned> ends(get_match_edge_segments(al.path));
+    const unsigned as(al.path.size());
+    for (unsigned i(0); i<as; ++i)
+    {
+        const path_segment& ps(al.path[i]);
+        const bool isLeadingEdgeSegment(i<ends.first);
+        const bool isTrailingEdgeSegment(i>ends.second);
+        const bool isEdgeTarget(isLeadingEdgeSegment || isTrailingEdgeSegment);
+
+        if (isEdgeTarget)
+        {
+            if (is_segment_type_read_length(ps.type))
+            {
+                if (isLeadingEdgeSegment)
+                {
+                    beginPos -= ps.length;
+                }
+                else
+                {
+                    endPos += ps.length;
+                }
+            }
+
+        }
+        else
+        {
+            if (is_segment_type_ref_length(ps.type))
+            {
+                endPos += ps.length;
+            }
+        }
+    }
+
+    return known_pos_range2(beginPos, endPos);
+}
diff --git a/src/c++/lib/blt_util/SimpleAlignment.hh b/src/c++/lib/blt_util/SimpleAlignment.hh
new file mode 100644
index 0000000..8ed72c5
--- /dev/null
+++ b/src/c++/lib/blt_util/SimpleAlignment.hh
@@ -0,0 +1,75 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+///
+///
+
+#pragma once
+
+#include "blt_util/align_path.hh"
+#include "blt_util/known_pos_range2.hh"
+
+#include <iosfwd>
+
+
+/// holds minimal alignment information processed
+/// from a BAM record or other source
+struct SimpleAlignment
+{
+    bool is_fwd_strand = true;
+    int32_t tid = 0;
+    pos_t pos = 0;
+    ALIGNPATH::path_t path;
+};
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SimpleAlignment& sa);
+
+
+/// convert segment_type to match if the segment exists before or after all match segments currently in the alignment
+///
+SimpleAlignment
+matchifyEdgeSegmentType(
+    const SimpleAlignment& al,
+    const ALIGNPATH::align_t segmentType,
+    const bool isMatchLeadingEdge = true,
+    const bool isMatchTrailingEdge = true);
+
+
+/// transform an alignment such that any soft-clipped edge segments
+/// become match.
+///
+/// segments are joined and start pos is adjusted appropriately
+///
+inline
+SimpleAlignment
+matchifyEdgeSoftClip(const SimpleAlignment& al)
+{
+    return matchifyEdgeSegmentType(al, ALIGNPATH::SOFT_CLIP);
+}
+
+
+/// get the range in reference coordinates if you did run matchifyEdgeSoftClip on an alignment:
+known_pos_range2
+matchifyEdgeSoftClipRefRange(const SimpleAlignment& al);
diff --git a/src/c++/lib/blt_util/SizeDistribution.cpp b/src/c++/lib/blt_util/SizeDistribution.cpp
new file mode 100644
index 0000000..67def5d
--- /dev/null
+++ b/src/c++/lib/blt_util/SizeDistribution.cpp
@@ -0,0 +1,231 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Xiaoyu Chen
+///
+
+#include "SizeDistribution.hh"
+
+#include "blt_util/log.hh"
+
+#include "boost/foreach.hpp"
+
+#include <cassert>
+#include <cmath>
+#include <iostream>
+
+//#define DEBUG_RPS
+
+
+static
+void
+populateCdfQuantiles(
+    SizeDistribution::map_type& sizeMap,
+    const unsigned totalCount,
+    std::vector<int>& quantiles)
+{
+    const unsigned quantileNum(quantiles.size());
+    const float pFactor(1/static_cast<float>(totalCount));
+
+    unsigned fillBase(0);
+    unsigned cumulativeCount(0);
+    BOOST_REVERSE_FOREACH(SizeDistribution::map_type::value_type& val, sizeMap)
+    {
+        cumulativeCount += (val.second.count);
+        assert(cumulativeCount <= totalCount);
+
+        // update the hash map with cumulative prob value
+        val.second.cprob = (cumulativeCount * pFactor);
+
+        const unsigned fillNext = static_cast<unsigned>(rint(val.second.cprob * quantileNum));
+        for (; fillBase < fillNext; fillBase++)
+        {
+            quantiles[fillBase] = val.first;
+        }
+    }
+}
+
+
+
+
+void
+SizeDistribution::
+calcStats() const
+{
+#ifdef DEBUG_RPS
+    log_os << "Calculating stats...\n"
+           << "numOfSized=" << _sizeMap.size() << "\n";
+#endif
+    _isStatsComputed=true;
+    if (_sizeMap.empty()) return;
+
+    populateCdfQuantiles(_sizeMap, _totalCount, _quantiles);
+}
+
+
+
+int
+SizeDistribution::
+quantile(const float prob) const
+{
+    assert((prob >= 0.) && (prob <= 1.));
+
+    static const int maxBin(_quantileNum - 1);
+    if (! _isStatsComputed) calcStats();
+
+    int bin(static_cast<int>(ceil(prob * _quantileNum) - 1));
+    if (bin < 0) bin=0;
+    if (bin > maxBin) bin=maxBin;
+    return _quantiles[bin];
+}
+
+
+
+float
+SizeDistribution::
+cdf(const int size) const
+{
+    if (! _isStatsComputed) calcStats();
+
+    // map uses greater<int> for comp, so lower bound is "first element not greater than" size, from a list sorted high->low
+    const map_type::const_iterator sizeIter(_sizeMap.lower_bound(size));
+    if (sizeIter == _sizeMap.end()) return 0;
+    return sizeIter->second.cprob;
+}
+
+
+
+float
+SizeDistribution::
+pdf(const int size) const
+{
+    if (! _isStatsComputed) calcStats();
+
+    static const unsigned targetSampleSize(5);
+
+    unsigned count(0);
+    int minSize(size);
+    int maxSize(size);
+
+    bool isMinBound(false);
+    bool isMaxBound(false);
+
+    /// scheme: get the five closest (in bin space) samples and sum them divided by the range required to find them
+
+    // map uses greater<int> for comp, so lower bound is "first element not greater than" size, from a list sorted high->low
+    map_type::const_iterator lowIter(_sizeMap.lower_bound(size));
+
+    if (lowIter == _sizeMap.end())
+    {
+        isMinBound=true;
+    }
+
+    map_type::const_iterator highIter(lowIter);
+
+    if (highIter == _sizeMap.begin())
+    {
+        isMaxBound=true;
+    }
+    else
+    {
+        --highIter;
+    }
+
+
+    for (unsigned sampleIndex(0); sampleIndex<targetSampleSize; ++sampleIndex)
+    {
+        // determine whether fwd or rev pointer is closer to size:
+        if (isMinBound && isMaxBound) break;
+
+        bool isChooseLow(true);
+        if (isMinBound)
+        {
+            isChooseLow=false;
+        }
+        else if (isMaxBound)
+        {
+            isChooseLow=true;
+        }
+        else
+        {
+            isChooseLow=(std::abs(lowIter->first-size) <= std::abs(highIter->first-size));
+        }
+
+        if (isChooseLow)
+        {
+            minSize = lowIter->first;
+            count += lowIter->second.count;
+            ++lowIter;
+
+            if (lowIter == _sizeMap.end()) isMinBound=true;
+        }
+        else
+        {
+            maxSize = highIter->first;
+            count += highIter->second.count;
+            if (highIter == _sizeMap.begin())
+            {
+                isMaxBound=true;
+            }
+            else
+            {
+                --highIter;
+            }
+        }
+    }
+
+    assert(maxSize >= minSize);
+
+    return count/(static_cast<float>(_totalCount)*static_cast<float>(1+maxSize-minSize));
+}
+
+
+
+void
+SizeDistribution::
+filterObservationsOverQuantile(const float prob)
+{
+    const int maxSize(quantile(prob));
+    const map_type::iterator sizeBegin(_sizeMap.begin());
+    map_type::iterator sizeEnd(_sizeMap.lower_bound(maxSize));
+
+    for (map_type::iterator sizeIter(sizeBegin); sizeIter != sizeEnd; ++sizeIter)
+    {
+        if (sizeIter->first <= maxSize)
+        {
+            sizeEnd = sizeIter;
+            break;
+        }
+        _totalCount -= sizeIter->second.count;
+    }
+    _sizeMap.erase(sizeBegin,sizeEnd);
+
+    _isStatsComputed=false;
+}
+
+
+
+std::ostream&
+operator<<(std::ostream& os, const SizeDistribution& sd)
+{
+    os << sd.totalObservations() << '\n';
+    return os;
+}
diff --git a/src/c++/lib/blt_util/SizeDistribution.hh b/src/c++/lib/blt_util/SizeDistribution.hh
new file mode 100644
index 0000000..08b90ef
--- /dev/null
+++ b/src/c++/lib/blt_util/SizeDistribution.hh
@@ -0,0 +1,167 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Xiaoyu Chen
+///
+
+#pragma once
+
+#include "boost/foreach.hpp"
+#include "boost/serialization/access.hpp"
+#include "boost/serialization/nvp.hpp"
+#include "boost/serialization/split_member.hpp"
+
+#include <functional>
+#include <iosfwd>
+#include <map>
+#include <vector>
+
+
+struct SizeData
+{
+    SizeData(
+        unsigned initCount = 0,
+        float initCprob = 0.) :
+        count(initCount),
+        cprob(initCprob)
+    {}
+
+    unsigned count;
+    float cprob;
+};
+
+
+/// this structure's only purpose is to provide neat xml output.
+/// it is not used outside of serialize/deserialize steps
+struct SizeMapXmlElement
+{
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /*version*/)
+    {
+        ar& boost::serialization::make_nvp("size", size);
+        ar& boost::serialization::make_nvp("count", count);
+    }
+
+    int size;
+    unsigned count;
+};
+
+BOOST_CLASS_IMPLEMENTATION(SizeMapXmlElement, object_serializable)
+
+
+/// accumulate size observations and provide cdf/quantile/smoothed-pdf for the distribution
+///
+struct SizeDistribution
+{
+    SizeDistribution() :
+        _isStatsComputed(false),
+        _totalCount(0),
+        _quantiles(_quantileNum,0)
+    {}
+
+    /// return size value for which we observe size value or less with prob p
+    int
+    quantile(const float prob) const;
+
+    /// return prob of observing this size or less
+    float
+    cdf(const int x) const;
+
+    /// provide smoothed prob of observing this size
+    float
+    pdf(const int x) const;
+
+    unsigned
+    totalObservations() const
+    {
+        return _totalCount;
+    }
+
+    void
+    addObservation(const int size)
+    {
+        _isStatsComputed = false;
+        _totalCount++;
+        _sizeMap[size].count++;
+    }
+
+    /// filter high value outliers:
+    void
+    filterObservationsOverQuantile(const float prob);
+
+    typedef std::map<int, SizeData, std::greater<int> > map_type;
+
+private:
+    void
+    calcStats() const;
+
+    friend class boost::serialization::access;
+    template<class Archive>
+    void save(Archive& ar, const unsigned /*version*/) const
+    {
+        ar << boost::serialization::make_nvp("totalObservationCount", _totalCount);
+        unsigned mapSize(_sizeMap.size());
+        ar << boost::serialization::make_nvp("elementCount", mapSize);
+
+        SizeMapXmlElement xe;
+        BOOST_REVERSE_FOREACH(const map_type::value_type& val, _sizeMap)
+        {
+            xe.size = val.first;
+            xe.count = val.second.count;
+            ar << boost::serialization::make_nvp("element", xe);
+        }
+    }
+
+    template<class Archive>
+    void load(Archive& ar, const unsigned /*version*/)
+    {
+        ar >> boost::serialization::make_nvp("totalObservationCount", _totalCount);
+        unsigned mapSize(0);
+        ar >> boost::serialization::make_nvp("elementCount", mapSize);
+
+        SizeMapXmlElement xe;
+        _sizeMap.clear();
+
+        for (unsigned i(0); i<mapSize; ++i)
+        {
+            ar >> boost::serialization::make_nvp("element", xe);
+            _sizeMap[xe.size].count = xe.count;
+        }
+        _isStatsComputed = false;
+    }
+
+    BOOST_SERIALIZATION_SPLIT_MEMBER()
+
+    ///////////////////////////////////// data:
+
+    static const int _quantileNum = 1000;
+
+    mutable bool _isStatsComputed;
+    unsigned _totalCount;
+    mutable std::vector<int> _quantiles;
+    mutable map_type _sizeMap;
+};
+
+BOOST_CLASS_IMPLEMENTATION(SizeDistribution, object_serializable)
+
+
+std::ostream&
+operator<<(std::ostream& os, const SizeDistribution& sd);
diff --git a/src/c++/lib/blt_util/align_path.cpp b/src/c++/lib/blt_util/align_path.cpp
new file mode 100644
index 0000000..7133f1c
--- /dev/null
+++ b/src/c++/lib/blt_util/align_path.cpp
@@ -0,0 +1,1025 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+
+#include "blt_util/align_path.hh"
+#include "blt_util/blt_exception.hh"
+#include "blt_util/log.hh"
+#include "blt_util/parse_util.hh"
+#include "blt_util/seq_util.hh"
+
+#include "boost/foreach.hpp"
+#include "boost/lexical_cast.hpp"
+
+#include <cassert>
+
+#include <algorithm>
+#include <iostream>
+#include <sstream>
+
+
+
+static
+void
+unknown_cigar_error(const char* const cigar,
+                    const char* const cptr)
+{
+    std::ostringstream oss;
+    oss << "ERROR: can't parse cigar string: " << cigar << "\n"
+        << "\tunexpected character: '" << *cptr << "' at position: " << (cptr-cigar+1) << "\n";
+    throw blt_exception(oss.str().c_str());
+}
+
+
+
+namespace ALIGNPATH
+{
+
+
+void
+apath_to_cigar(const path_t& apath,
+               std::string& cigar)
+{
+    cigar.clear();
+    for (const path_segment& ps : apath)
+    {
+        cigar += boost::lexical_cast<std::string>(ps.length);
+        cigar.push_back(segment_type_to_cigar_code(ps.type));
+    }
+}
+
+
+
+std::ostream&
+operator<<(std::ostream& os, const path_t& apath)
+{
+    for (const path_segment& ps : apath)
+    {
+        os << ps.length << segment_type_to_cigar_code(ps.type);
+    }
+    return os;
+}
+
+
+
+void
+cigar_to_apath(const char* cigar,
+               path_t& apath)
+{
+    using illumina::blt_util::parse_unsigned;
+
+    assert(NULL != cigar);
+
+    apath.clear();
+
+    path_segment lps;
+    const char* cptr(cigar);
+    while (*cptr)
+    {
+        path_segment ps;
+        // expect sequences of digits and cigar codes:
+        if (! isdigit(*cptr)) unknown_cigar_error(cigar,cptr);
+        ps.length = parse_unsigned(cptr);
+        ps.type = cigar_code_to_segment_type(*cptr);
+        if (ps.type == NONE) unknown_cigar_error(cigar,cptr);
+        cptr++;
+        if ((ps.type == PAD) || (ps.length == 0)) continue;
+
+        if (ps.type != lps.type)
+        {
+            if (lps.type != NONE) apath.push_back(lps);
+            lps = ps;
+        }
+        else
+        {
+            lps.length += ps.length;
+        }
+    }
+
+    if (lps.type != NONE) apath.push_back(lps);
+}
+
+
+
+unsigned
+apath_read_length(const path_t& apath)
+{
+    unsigned val(0);
+    for (const path_segment& ps : apath)
+    {
+        if (! is_segment_type_read_length(ps.type)) continue;
+        val += ps.length;
+    }
+    return val;
+}
+
+unsigned
+apath_matched_length(const path_t& apath)
+{
+    unsigned val(0);
+    for (const path_segment& ps : apath)
+    {
+        if (! is_segment_align_match(ps.type)) continue;
+        val += ps.length;
+    }
+    return val;
+}
+
+unsigned
+apath_spliced_length(const path_t& apath)
+{
+    unsigned val(0);
+    for (const path_segment& ps : apath)
+    {
+        if (ps.type == SKIP) val += ps.length;
+    }
+    return val;
+}
+
+unsigned
+apath_ref_length(const path_t& apath)
+{
+    unsigned val(0);
+    for (const path_segment& ps : apath)
+    {
+        if (! is_segment_type_ref_length(ps.type)) continue;
+        val += ps.length;
+    }
+    return val;
+}
+
+
+
+static
+inline
+bool
+is_segment_type_unaligned_read_edge(const align_t id)
+{
+    switch (id)
+    {
+    case INSERT    :
+    case HARD_CLIP :
+    case SOFT_CLIP :
+        return true;
+    default        :
+        return false;
+    }
+}
+
+
+
+unsigned
+apath_read_lead_size(const path_t& apath)
+{
+    unsigned val(0);
+    for (const path_segment& ps : apath)
+    {
+        if (! is_segment_type_unaligned_read_edge(ps.type)) return val;
+        if (is_segment_type_read_length(ps.type)) val += ps.length;
+    }
+    return val;
+}
+
+
+
+unsigned
+apath_read_trail_size(const path_t& apath)
+{
+    unsigned val(0);
+    BOOST_REVERSE_FOREACH(const path_segment& ps, apath)
+    {
+        if (! is_segment_type_unaligned_read_edge(ps.type)) return val;
+        if (is_segment_type_read_length(ps.type)) val += ps.length;
+    }
+    return val;
+}
+
+
+
+unsigned
+apath_soft_clip_lead_size(const path_t& apath)
+{
+    unsigned val(0);
+    for (const path_segment& ps : apath)
+    {
+        if       (HARD_CLIP == ps.type)
+        {
+            // do nothing:
+        }
+        else if (SOFT_CLIP == ps.type)
+        {
+            val += ps.length;
+        }
+        else
+        {
+            break;
+        }
+    }
+    return val;
+}
+
+
+
+unsigned
+apath_soft_clip_trail_size(const path_t& apath)
+{
+    unsigned val(0);
+    BOOST_REVERSE_FOREACH(const path_segment& ps, apath)
+    {
+        if       (HARD_CLIP == ps.type)
+        {
+            // do nothing:
+        }
+        else if (SOFT_CLIP == ps.type)
+        {
+            val += ps.length;
+        }
+        else
+        {
+            break;
+        }
+    }
+    return val;
+}
+
+unsigned
+apath_clip_lead_size(const path_t& apath)
+{
+    unsigned val(0);
+    for (const path_segment& ps : apath)
+    {
+        if ((HARD_CLIP == ps.type) || (SOFT_CLIP == ps.type))
+        {
+            val += ps.length;
+        }
+        else
+        {
+            break;
+        }
+    }
+    return val;
+}
+
+unsigned
+apath_clip_trail_size(const path_t& apath)
+{
+    unsigned val(0);
+    BOOST_REVERSE_FOREACH(const path_segment& ps, apath)
+    {
+        if  ((HARD_CLIP == ps.type) || (SOFT_CLIP == ps.type))
+        {
+            val += ps.length;
+        }
+        else
+        {
+            break;
+        }
+    }
+    return val;
+}
+
+unsigned
+apath_insert_lead_size(const path_t& apath)
+{
+    unsigned val(0);
+    for (const path_segment& ps : apath)
+    {
+        if ((HARD_CLIP == ps.type) || (SOFT_CLIP == ps.type))
+        {
+            // do nothing:
+        }
+        else if (INSERT == ps.type)
+        {
+            val += ps.length;
+        }
+        else
+        {
+            break;
+        }
+    }
+    return val;
+}
+
+
+
+unsigned
+apath_insert_trail_size(const path_t& apath)
+{
+    unsigned val(0);
+    BOOST_REVERSE_FOREACH(const path_segment& ps, apath)
+    {
+        if ((HARD_CLIP == ps.type) || (SOFT_CLIP == ps.type))
+        {
+            // do nothing:
+        }
+        else if (INSERT == ps.type)
+        {
+            val += ps.length;
+        }
+        else
+        {
+            break;
+        }
+    }
+    return val;
+}
+
+
+
+unsigned
+apath_indel_count(
+    const path_t& apath)
+{
+    unsigned val(0);
+    bool isIndel(false);
+    for (const path_segment& ps : apath)
+    {
+        if ((DELETE == ps.type) || (INSERT == ps.type))
+        {
+            if (! isIndel) val++;
+            isIndel=true;
+        }
+        else
+        {
+            isIndel=false;
+        }
+    }
+    return val;
+}
+
+
+
+void
+apath_limit_ref_length(
+    const unsigned target_ref_length,
+    path_t& apath)
+{
+    unsigned ref_length(0);
+
+    const unsigned as(apath.size());
+    for (unsigned i(0); i<as; ++i)
+    {
+        path_segment& ps(apath[i]);
+        if (! is_segment_type_ref_length(ps.type)) continue;
+        ref_length += ps.length;
+
+        if (ref_length < target_ref_length) continue;
+
+        if (ref_length > target_ref_length)
+        {
+            const unsigned extra(ref_length - target_ref_length);
+            assert(ps.length > extra);
+            ps.length -= extra;
+        }
+        apath.resize(i+1);
+        break;
+    }
+}
+
+
+void
+apath_limit_read_length(
+    const unsigned target_read_start,
+    const unsigned target_read_end,
+    path_t& apath)
+{
+    bool isStartSet(false);
+
+    unsigned read_length(0);
+    const unsigned as(apath.size());
+    unsigned startSegment(0);
+    unsigned endSegment(as);
+    for (unsigned i(0); i<as; ++i)
+    {
+        path_segment& ps(apath[i]);
+        if (! is_segment_type_read_length(ps.type)) continue;
+        read_length += ps.length;
+
+        if ((! isStartSet) && (read_length > target_read_start))
+        {
+            {
+                const unsigned extra(ps.length - (read_length - target_read_start));
+                assert(ps.length > extra);
+                ps.length -= extra;
+            }
+            startSegment=i;
+            isStartSet=true;
+        }
+
+        if (read_length >= target_read_end)
+        {
+            if (read_length > target_read_end)
+            {
+                const unsigned extra(read_length - target_read_end);
+                assert(ps.length > extra);
+                ps.length -= extra;
+            }
+            endSegment=i+1;
+            break;
+        }
+    }
+    apath = path_t(apath.begin()+startSegment,apath.begin()+endSegment);
+}
+
+
+void
+apath_append(
+    path_t& apath,
+    const align_t seg_type,
+    const unsigned length)
+{
+    if (apath.size() && apath.back().type == seg_type)
+    {
+        apath.back().length += length;
+    }
+    else
+    {
+        apath.emplace_back(seg_type,length);
+    }
+}
+
+
+
+void
+apath_clip_clipper(path_t& apath,
+                   unsigned& hc_lead,
+                   unsigned& hc_trail,
+                   unsigned& sc_lead,
+                   unsigned& sc_trail)
+{
+    hc_lead=0;
+    hc_trail=0;
+    sc_lead=0;
+    sc_trail=0;
+
+    bool is_lead(true);
+    path_t apath2;
+    for (const path_segment& ps : apath)
+    {
+        if       (HARD_CLIP == ps.type)
+        {
+            if (is_lead)
+            {
+                hc_lead += ps.length;
+            }
+            else
+            {
+                hc_trail += ps.length;
+            }
+        }
+        else if (SOFT_CLIP == ps.type)
+        {
+            if (is_lead)
+            {
+                sc_lead += ps.length;
+            }
+            else
+            {
+                sc_trail += ps.length;
+            }
+        }
+        else
+        {
+            is_lead=false;
+            assert(0==hc_trail);
+            assert(0==sc_trail);
+            apath2.push_back(ps);
+        }
+    }
+    apath=apath2;
+}
+
+
+
+void
+apath_clip_adder(path_t& apath,
+                 const unsigned hc_lead,
+                 const unsigned hc_trail,
+                 const unsigned sc_lead,
+                 const unsigned sc_trail)
+{
+    path_t apath2;
+    path_segment ps;
+    if (hc_lead>0)
+    {
+        ps.type = HARD_CLIP;
+        ps.length = hc_lead;
+        apath2.push_back(ps);
+    }
+    if (sc_lead>0)
+    {
+        ps.type = SOFT_CLIP;
+        ps.length = sc_lead;
+        apath2.push_back(ps);
+    }
+    apath2.insert(apath2.end(),apath.begin(),apath.end());
+    if (sc_trail>0)
+    {
+        ps.type = SOFT_CLIP;
+        ps.length = sc_trail;
+        apath2.push_back(ps);
+    }
+    if (hc_trail>0)
+    {
+        ps.type = HARD_CLIP;
+        ps.length = hc_trail;
+        apath2.push_back(ps);
+    }
+    apath=apath2;
+}
+
+
+
+// 1. remove zero-length segments
+// 2. remove pads
+// 3. condense repeated segment types
+// 4. reduce adjacent insertion/deletion tags to a single pair
+// 5. replace NDN pattern with single SKIP segment
+//
+// return true if path has been altered
+//
+bool
+apath_cleaner(path_t& apath)
+{
+    bool is_cleaned(false);
+    const unsigned as(apath.size());
+    unsigned insertIndex(as);
+    unsigned deleteIndex(as);
+    unsigned otherIndex(as);
+    for (unsigned i(0); i<as; ++i)
+    {
+        path_segment& ps(apath[i]);
+        if       (ps.length == 0)
+        {
+            is_cleaned = true;
+        }
+        else if (ps.type == PAD)
+        {
+            ps.length = 0;
+            is_cleaned = true;
+        }
+        else if (ps.type == INSERT)
+        {
+            if (insertIndex < as)
+            {
+                apath[insertIndex].length += ps.length;
+                ps.length = 0;
+                is_cleaned = true;
+            }
+            else
+            {
+                insertIndex = i;
+            }
+        }
+        else if (ps.type == DELETE)
+        {
+            if (deleteIndex < as)
+            {
+                apath[deleteIndex].length += ps.length;
+                ps.length = 0;
+                is_cleaned = true;
+            }
+            else
+            {
+                deleteIndex = i;
+            }
+        }
+        else
+        {
+            if ((insertIndex<as) || (deleteIndex<as))
+            {
+                insertIndex = as;
+                deleteIndex = as;
+                otherIndex = as;
+            }
+            if ((otherIndex < as) && (apath[otherIndex].type == ps.type))
+            {
+                apath[otherIndex].length += ps.length;
+                ps.length = 0;
+                is_cleaned = true;
+            }
+            else
+            {
+                otherIndex = i;
+            }
+        }
+    }
+
+    // convert NDN to single N:
+    for (unsigned i(0); i<as; ++i)
+    {
+        path_segment& ps(apath[i]);
+        if (ps.type == SKIP)
+        {
+            if ( (i+2)<as)
+            {
+                if ((apath[i+1].type == DELETE) && (apath[i+2].type == SKIP))
+                {
+                    for (unsigned j(1); j<3; ++j)
+                    {
+                        ps.length += apath[i+j].length;
+                        apath[i+j].length = 0;
+                    }
+                    is_cleaned = true;
+                }
+            }
+        }
+    }
+
+    if (is_cleaned)
+    {
+        path_t apath2;
+        for (const path_segment& ps : apath)
+        {
+            if (ps.length == 0) continue;
+            apath2.push_back(ps);
+        }
+        apath = apath2;
+    }
+    return is_cleaned;
+}
+
+
+
+void
+apath_clean_seqmatch(path_t& apath)
+{
+    path_t apath2;
+    bool is_match(false);
+    for (const path_segment& ps : apath)
+    {
+        if (is_segment_align_match(ps.type))
+        {
+            if (is_match)
+            {
+                apath2.back().length += ps.length;
+            }
+            else
+            {
+                apath2.emplace_back(MATCH,ps.length);
+            }
+            is_match=true;
+        }
+        else
+        {
+            apath2.push_back(ps);
+            is_match=false;
+        }
+    }
+
+    apath = apath2;
+}
+
+#if 0
+std::pair<unsigned,unsigned>
+get_nonclip_end_segments(const path_t& apath)
+{
+    const unsigned as(apath.size());
+    std::pair<unsigned,unsigned> res(as,as);
+    bool is_first_nonclip(false);
+    for (unsigned i(0); i<as; ++i)
+    {
+        const path_segment& ps(apath[i]);
+        if (! (ps.type == SOFT_CLIP ||
+               ps.type == HARD_CLIP))
+        {
+            if (! is_first_nonclip)
+            {
+                res.first=i;
+                is_first_nonclip=true;
+            }
+            res.second=i;
+        }
+    }
+    return res;
+}
+#endif
+
+
+pos_range
+get_nonclip_range(const path_t& apath)
+{
+    pos_range pr;
+    unsigned read_offset(0);
+    for (const path_segment& ps : apath)
+    {
+        const bool is_rt(is_segment_type_read_length(ps.type));
+        if (! (ps.type == SOFT_CLIP ||
+               ps.type == HARD_CLIP))
+        {
+            if (! pr.is_begin_pos)
+            {
+                pr.set_begin_pos(read_offset);
+            }
+            pr.set_end_pos(read_offset + (is_rt ? ps.length : 0));
+        }
+        if (is_rt) read_offset+=ps.length;
+    }
+    return pr;
+}
+
+
+
+std::pair<unsigned,unsigned>
+get_match_edge_segments(const path_t& apath)
+{
+    const unsigned as(apath.size());
+    std::pair<unsigned,unsigned> res(as,as);
+    bool is_first_match(false);
+    for (unsigned i(0); i<as; ++i)
+    {
+        const path_segment& ps(apath[i]);
+        if (is_segment_align_match(ps.type))
+        {
+            if (! is_first_match) res.first=i;
+            is_first_match=true;
+            res.second=i;
+        }
+    }
+    return res;
+}
+
+
+
+unsigned
+apath_exon_count(const path_t& apath)
+{
+    unsigned val(1);
+    for (const auto& ps : apath)
+    {
+        if (ps.type==SKIP) val++;
+    }
+    return val;
+}
+
+
+
+bool
+is_clipped(const path_t& apath)
+{
+    const unsigned as(apath.size());
+    if (as==0) return false;
+    if ((apath[0].type == SOFT_CLIP) || (apath[0].type == HARD_CLIP)) return true;
+    if (as>1)
+    {
+        if ((apath[as-1].type == SOFT_CLIP) || (apath[as-1].type == HARD_CLIP)) return true;
+    }
+    return false;
+}
+
+bool
+is_clipped_front(const path_t& apath)
+{
+    const unsigned as(apath.size());
+    if (as==0) return false;
+    if ((apath[0].type == SOFT_CLIP) || (apath[0].type == HARD_CLIP)) return true;
+    return false;
+}
+
+
+
+unsigned
+get_clip_len(const path_t& apath)
+{
+    const unsigned as(apath.size());
+    if (as==0) return 0;
+    if ((apath[0].type == SOFT_CLIP) || (apath[0].type == HARD_CLIP))
+    {
+        return apath[0].length;
+    }
+    if (as>1)
+    {
+        if ((apath[as-1].type == SOFT_CLIP) || (apath[as-1].type == HARD_CLIP))
+        {
+            return apath[as-1].length;
+        }
+    }
+    return 0;
+}
+
+
+
+bool
+is_soft_clipped(const path_t& apath)
+{
+    for (const path_segment& ps : apath)
+    {
+        if (SOFT_CLIP == ps.type) return true;
+    }
+    return false;
+}
+
+
+
+bool
+is_edge_readref_len_segment(const path_t& apath)
+{
+    const unsigned as(apath.size());
+    if (as==0) return false;
+
+    const std::pair<unsigned,unsigned> ends(get_match_edge_segments(apath));
+
+    // at this point we assume the alignment has been sanity checked for legal clipping,
+    // where hard-clip is only on the outside, next soft-clipping, then anything else...
+    //
+    for (unsigned i(0); i<as; ++i)
+    {
+        const path_segment& ps(apath[i]);
+
+        const bool is_edge_segment((i<ends.first) || (i>ends.second));
+        const bool is_clip_type(ps.type==INSERT || ps.type==DELETE || ps.type==SKIP || ps.type==SOFT_CLIP);
+        if (is_edge_segment && is_clip_type) return true;
+    }
+    return false;
+}
+
+
+
+bool
+is_seq_swap(const path_t& apath)
+{
+    const unsigned as(apath.size());
+    for (unsigned i(0); (i+1)<as; ++i)
+    {
+        if (is_segment_type_indel(apath[i].type) &&
+            is_segment_type_indel(apath[i+1].type))
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+
+
+bool
+is_segment_swap_start(
+    const path_t& apath,
+    unsigned i)
+{
+    using namespace ALIGNPATH;
+
+    bool is_insert(false);
+    bool is_delete(false);
+
+    const unsigned as(apath.size());
+    for (; i<as; ++i)
+    {
+        if     (apath[i].type == INSERT)
+        {
+            is_insert=true;
+        }
+        else if (apath[i].type == DELETE)
+        {
+            is_delete=true;
+        }
+        else
+        {
+            break;
+        }
+    }
+
+    return (is_insert && is_delete);
+}
+
+
+
+bool
+is_apath_floating(const path_t& apath)
+{
+    for (const path_segment& ps : apath)
+    {
+        if (is_segment_align_match(ps.type)) return false;
+    }
+    return true;
+}
+
+
+std::string
+get_apath_invalid_reason(const path_t& apath,
+                         const unsigned seq_length)
+{
+    const ALIGN_ISSUE::issue_t ai(get_apath_invalid_type(apath,seq_length));
+
+    if (ALIGN_ISSUE::LENGTH == ai)
+    {
+        std::ostringstream oss;
+        oss << "alignment length (" << apath_read_length(apath) << ") does not match read length (" << seq_length << ")";
+        return oss.str();
+    }
+
+    return std::string(ALIGN_ISSUE::description(ai));
+}
+
+
+
+ALIGN_ISSUE::issue_t
+get_apath_invalid_type(const path_t& apath,
+                       const unsigned seq_length)
+{
+    bool is_match(false);
+    align_t last_type(NONE);
+    const unsigned as(apath.size());
+    for (unsigned i(0); i<as; ++i)
+    {
+        const path_segment& ps(apath[i]);
+
+        if (ps.type==NONE) return ALIGN_ISSUE::UNKNOWN_SEGMENT;
+        if ((i!=0) && ps.type==last_type) return ALIGN_ISSUE::REPEATED_SEGMENT;
+
+        if (! is_match)
+        {
+            if (ps.type==SKIP) return ALIGN_ISSUE::EDGE_SKIP;
+        }
+
+        if (ps.type==HARD_CLIP)
+        {
+            if (! ((i==0) || ((i+1)==as))) return ALIGN_ISSUE::CLIPPING;
+        }
+
+        if (ps.type==SOFT_CLIP)
+        {
+            if (! ((i==0) || ((i+1)==as)))
+            {
+                if (i==1)
+                {
+                    if (as==3)
+                    {
+                        if ((apath[0].type != HARD_CLIP) && (apath[i+1].type != HARD_CLIP)) return ALIGN_ISSUE::CLIPPING;
+                    }
+                    else
+                    {
+                        if (apath[0].type != HARD_CLIP) return ALIGN_ISSUE::CLIPPING;
+                    }
+                }
+                else if ((i+2)==as)
+                {
+                    if (apath[i+1].type != HARD_CLIP) return ALIGN_ISSUE::CLIPPING;
+                }
+                else
+                {
+                    return ALIGN_ISSUE::CLIPPING;
+                }
+            }
+        }
+
+        if ((! is_match) && (is_segment_align_match(ps.type))) is_match=true;
+
+        last_type=ps.type;
+    }
+
+    if (! is_match) return ALIGN_ISSUE::FLOATING;
+
+    // run in reverse to finish checking condition (2a):
+    for (unsigned i(0); i<as; ++i)
+    {
+        const path_segment& ps(apath[as-(i+1)]);
+        if (is_segment_align_match(ps.type)) break;
+        //if(ps.type==DELETE) return ALIGN_ISSUE::EDGE_DELETE;
+        if (ps.type==SKIP) return ALIGN_ISSUE::EDGE_SKIP;
+    }
+
+    if (seq_length != apath_read_length(apath)) return ALIGN_ISSUE::LENGTH;
+
+    return ALIGN_ISSUE::NONE;
+}
+
+
+
+// Unlike the above function which tests for invalid alignment paths,
+// this function test for valid alignment methods which starling
+// simply cannot handle
+//
+bool
+is_apath_starling_invalid(const path_t& apath)
+{
+    for (const path_segment& ps : apath)
+    {
+        if (ps.type==PAD) return true;
+    }
+    return false;
+}
+
+
+}  // namespace ALIGNPATH
diff --git a/src/c++/lib/blt_util/align_path.hh b/src/c++/lib/blt_util/align_path.hh
new file mode 100644
index 0000000..15dfe9b
--- /dev/null
+++ b/src/c++/lib/blt_util/align_path.hh
@@ -0,0 +1,560 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/pos_range.hh"
+#include "blt_util/known_pos_range2.hh"
+
+#include <iosfwd>
+#include <string>
+#include <vector>
+
+/// process export MD and output the alignment information in a format
+/// which will not need to change significantly for SAM/BAM (by
+/// providing essentially a parsed forward strand CIGAR string)
+///
+/// idea is for this code to migrate to some kind of joint export/sam api code
+///
+namespace ALIGNPATH
+{
+
+enum align_t
+{
+    NONE,
+    MATCH,
+    INSERT,
+    DELETE,
+    SKIP,
+    SOFT_CLIP,
+    HARD_CLIP,
+    PAD,
+    SEQ_MATCH,
+    SEQ_MISMATCH
+};
+
+inline
+char
+segment_type_to_cigar_code(const align_t id)
+{
+    switch (id)
+    {
+    case MATCH     :
+        return 'M';
+    case INSERT    :
+        return 'I';
+    case DELETE    :
+        return 'D';
+    case SKIP      :
+        return 'N';
+    case SOFT_CLIP :
+        return 'S';
+    case HARD_CLIP :
+        return 'H';
+    case PAD       :
+        return 'P';
+    case SEQ_MATCH  :
+        return '=';
+    case SEQ_MISMATCH  :
+        return 'X';
+    default :
+        return 'X';
+    }
+}
+
+inline
+align_t
+cigar_code_to_segment_type(const char c)
+{
+    switch (c)
+    {
+    case 'M' :
+        return MATCH;
+    case 'I' :
+        return INSERT;
+    case 'D' :
+        return DELETE;
+    case 'N' :
+        return SKIP;
+    case 'S' :
+        return SOFT_CLIP;
+    case 'H' :
+        return HARD_CLIP;
+    case 'P' :
+        return PAD;
+    case '=' :
+        return SEQ_MATCH;
+    case 'X' :
+        return SEQ_MISMATCH;
+    default  :
+        return NONE;
+    }
+}
+
+inline
+bool
+is_segment_type_read_length(const align_t id)
+{
+    switch (id)
+    {
+    case MATCH     :
+    case INSERT    :
+    case SOFT_CLIP :
+    case SEQ_MATCH :
+    case SEQ_MISMATCH :
+        return true;
+    default        :
+        return false;
+    }
+}
+
+inline
+bool
+is_segment_type_ref_length(const align_t id)
+{
+    switch (id)
+    {
+    case MATCH  :
+    case DELETE :
+    case SKIP   :
+    case SEQ_MATCH :
+    case SEQ_MISMATCH :
+        return true;
+    default     :
+        return false;
+    }
+}
+
+inline
+bool
+is_segment_align_match(const align_t id)
+{
+    switch (id)
+    {
+    case MATCH :
+    case SEQ_MATCH :
+    case SEQ_MISMATCH :
+        return true;
+    default     :
+        return false;
+    }
+}
+
+inline
+bool
+is_segment_type_indel(const align_t id)
+{
+    switch (id)
+    {
+    case INSERT :
+    case DELETE :
+        return true;
+    default     :
+        return false;
+    }
+}
+
+struct path_segment
+{
+    path_segment(const align_t t = NONE,
+                 const unsigned l = 0) : type(t), length(l) {}
+
+    void
+    clear()
+    {
+        type=NONE;
+        length=0;
+    }
+
+    bool
+    operator==(const path_segment& rhs) const
+    {
+        return ((type==rhs.type) and (length==rhs.length));
+    }
+
+    // arbitrary ordering which lets us look up from a set of alignments:
+    bool
+    operator<(const path_segment& rhs) const
+    {
+        if (type<rhs.type) return true;
+        if (type!=rhs.type) return false;
+        return (length<rhs.length);
+    }
+
+    align_t type;
+    unsigned length;
+};
+
+typedef std::vector<path_segment> path_t;
+
+std::ostream& operator<<(std::ostream& os, const path_t& apath);
+
+void
+apath_to_cigar(const path_t& apath,
+               std::string& cigar);
+
+inline
+std::string
+apath_to_cigar(const path_t& apath)
+{
+    std::string cigar;
+    apath_to_cigar(apath,cigar);
+    return cigar;
+}
+
+/// Convert cigar string into apath format
+///
+/// any padding in the CIGAR string is removed
+void
+cigar_to_apath(const char* cigar,
+               path_t& apath);
+
+/// \return the read length spanned by the path
+unsigned
+apath_read_length(const path_t& apath);
+
+/// \return the reference length spanned by the path
+unsigned
+apath_ref_length(const path_t& apath);
+
+/// \return the number of aligned (matched or mismatched) bases in the path
+unsigned
+apath_matched_length(const path_t& apath);
+
+/// \return the number of refskip (e.g. RNA spliced) bases in the path
+unsigned
+apath_spliced_length(const path_t& apath);
+
+/// how much unaligned sequence (soft_clip or insert) occurs before the first aligned base?
+unsigned
+apath_read_lead_size(const path_t& apath);
+
+/// how much unaligned sequence (soft_clip or insert) occurs after the last aligned base?
+unsigned
+apath_read_trail_size(const path_t& apath);
+
+/// how much soft_clip occurs before the first aligned base?
+unsigned
+apath_soft_clip_lead_size(const path_t& apath);
+
+/// how much soft_clip occurs after the last aligned base?
+unsigned
+apath_soft_clip_trail_size(const path_t& apath);
+
+/// how much clip (soft or hard) occurs before the first aligned base?
+unsigned
+apath_clip_lead_size(const path_t& apath);
+
+/// how much clip (soft or hard) occurs after the last aligned base?
+unsigned
+apath_clip_trail_size(const path_t& apath);
+
+
+/// how much insert occurs before the first aligned base?
+unsigned
+apath_insert_lead_size(const path_t& apath);
+
+/// how much insert occurs after the last aligned base?
+unsigned
+apath_insert_trail_size(const path_t& apath);
+
+/// how many indels are in the alignment?
+///
+/// combinations of adjacent I and D segments are counted
+/// as one indel
+unsigned
+apath_indel_count(const path_t& apath);
+
+/// append segment to end of apath
+void
+apath_append(
+    path_t& apath,
+    const align_t seg_type,
+    const unsigned length = 1);
+
+/// trim the end off of the alignment so that the reference span
+/// is no greater than target_ref_length. The edited path could contain
+/// edge deletions
+///
+void
+apath_limit_ref_length(
+    const unsigned target_ref_length,
+    path_t& apath);
+
+/// trim the start and end off of the alignment so that the read span
+/// is no greater than target_read_length. The edited path could contain
+/// edge insertions
+///
+void
+apath_limit_read_length(
+    const unsigned target_read_start,
+    const unsigned target_read_end,
+    path_t& apath);
+
+inline
+void
+apath_limit_read_length(
+    const known_pos_range2& target_read_range,
+    path_t& apath)
+{
+    apath_limit_read_length(
+        static_cast<unsigned>(std::max(target_read_range.begin_pos(),0)),
+        static_cast<unsigned>(std::max(target_read_range.end_pos(),0)),
+        apath);
+}
+
+/// remove any edge clip from apath and return the amount
+/// removed from each side. if ambiguous, lead is favored over trail
+void
+apath_clip_clipper(path_t& apath,
+                   unsigned& hc_lead,
+                   unsigned& hc_trail,
+                   unsigned& sc_lead,
+                   unsigned& sc_trail);
+
+/// adds lead clip to front of alignment and trail clip
+/// to back -- assumes no clipping exists on the path already.
+///
+void
+apath_clip_adder(path_t& apath,
+                 const unsigned hc_lead,
+                 const unsigned hc_trail,
+                 const unsigned sc_lead,
+                 const unsigned sc_trail);
+
+/// 'cleans' the path so that it can be used, or used more consistently
+///
+/// note this does not try to correct or work around anything
+/// which can't be unambiguously reinterpreted to a simpler form
+///
+/// 1. remove zero length alignment segments
+/// 2. remove pad segments
+/// 3. remove repeated segments
+/// 4. for any combined insertion/deletion pair, reduce this to
+///    a single segment pair (in either order)
+///
+///  \return true if path has been altered
+///
+bool
+apath_cleaner(path_t& apath);
+
+/// Convert any cigar string using the seq_match/seq_mismatch operators (=/X) to the
+/// more widely accepted align match "M"
+void
+apath_clean_seqmatch(path_t& apath);
+
+/// convert the input alignpath to use seq match '=' and mismatch 'X' instead of align-match 'M'
+///
+template <typename symIter1,typename symIter2>
+void
+apath_add_seqmatch(
+    const symIter1 queryBegin,
+    const symIter1 queryEnd,
+    const symIter2 refBegin,
+    const symIter2 refEnd,
+    path_t& apath);
+
+
+#if 0
+// Get the match descriptor segment numbers for the first and last
+// non-soft/hard clipped segments. Return total segment size on
+// error.
+std::pair<unsigned,unsigned>
+get_nonclip_end_segments(const path_t& apath);
+#endif
+
+/// return the read coordinate range after clipping:
+pos_range
+get_nonclip_range(const path_t& apath);
+
+/// Get the match descriptor segment numbers for the first and last
+/// match segments. Return total segment size on error.
+std::pair<unsigned,unsigned>
+get_match_edge_segments(const path_t& apath);
+
+unsigned
+apath_exon_count(const path_t& apath);
+
+/// provide reference offsets for the beginning of each exon:
+///
+struct exon_offsets
+{
+    exon_offsets(const path_t& apath)
+        : _apath(apath)
+        , _asize(apath.size())
+        , _offset(0)
+        , _segment(0)
+    {}
+
+    bool
+    next()
+    {
+        bool is_break_next(false);
+        for (; _segment<_asize; ++_segment)
+        {
+            if (is_break_next) return true;
+            const path_segment& ps(_apath[_segment]);
+            if (ps.type==SKIP) is_break_next=true;
+            if (is_segment_type_ref_length(ps.type)) _offset += ps.length;
+        }
+        return false;
+    }
+
+    unsigned
+    offset() const
+    {
+        return _offset;
+    }
+
+private:
+    const path_t& _apath;
+    const unsigned _asize;
+    unsigned _offset;
+    unsigned _segment;
+};
+
+/// does the alignment contain any soft-clipped segments?
+bool
+is_soft_clipped(const path_t& apath);
+
+/// is either edge of the alignment soft-clipped or hard-clipped?
+bool
+is_clipped(const path_t& apath);
+
+/// is the first edge of the alignment soft-clipped or hard-clipped?
+bool
+is_clipped_front(const path_t& apath);
+
+/// return length of clipped pre- or postfix
+unsigned
+get_clip_len(const path_t& apath);
+
+/// does either edge of the alignment
+/// contain a segment which impacts read length or reference positions?
+/// (INSERT,DELETE,SKIP,SOFT_CLIP)
+///
+/// Note: "edge" is defined as any segment with match segments to only one side
+/// Note: edge HARD_CLIP, PAD, etc.. are ignored
+///
+bool
+is_edge_readref_len_segment(const path_t& apath);
+
+/// does alignment contain an adjacent insertion/deletion event?
+///
+bool
+is_seq_swap(const path_t& apath);
+
+/// is the given segment the beginning of a seq swap?
+bool
+is_segment_swap_start(const path_t& apath,
+                      const unsigned i);
+
+/// test if alignment has no match:
+bool
+is_apath_floating(const path_t& apath);
+
+
+namespace ALIGN_ISSUE
+{
+enum issue_t
+{
+    NONE,
+    CLIPPING,
+    EDGE_DELETE,
+    EDGE_SKIP,
+    UNKNOWN_SEGMENT,
+    REPEATED_SEGMENT,
+    FLOATING,
+    LENGTH
+};
+
+inline
+const char*
+description(const issue_t i)
+{
+    switch (i)
+    {
+    case CLIPPING:
+        return "alignment contains invalid clipping";
+    case EDGE_DELETE:
+        return "deletion on alignment edge";
+    case EDGE_SKIP:
+        return "skip on alignment edge";
+    case UNKNOWN_SEGMENT:
+        return "unknown segment in alignment";
+    case REPEATED_SEGMENT:
+        return "alignment contains repeated segment";
+    case FLOATING:
+        return "alignment contains no match segments";
+    case LENGTH:
+        return "alignment length does not match read length";
+    default:
+        return "no error";
+    }
+}
+}
+
+
+/// Take a shot at the relatively simple stuff:
+///
+/// 1) clipping only occurs on the edge and hardclip must occur outside of soft-clip
+/// 2) delete and skip cannot occur on edge
+///   2a) delete and skip cannot occur with only insert and clip connecting them to edge
+/// 3) no unknown segments
+/// 4) no repeated segments
+///      Note this might semi-legitimately occur where padding is stripped out of an alignment.
+/// 5) must contain at least one match segment
+///
+ALIGN_ISSUE::issue_t
+get_apath_invalid_type(const path_t& path,
+                       const unsigned seq_length);
+
+/// if is_apath_invalid fails, this supplies an error string
+std::string
+get_apath_invalid_reason(const path_t& apath,
+                         const unsigned seq_length);
+
+/// simple boolean call to the invalid alignment typer.
+inline
+bool
+is_apath_invalid(const path_t& apath,
+                 const unsigned seq_length)
+{
+    return (ALIGN_ISSUE::NONE != get_apath_invalid_type(apath,seq_length));
+}
+
+/// check for conditions on an otherwise valid path which starling
+/// does not handle:
+/// TODO: move  this into starling-specific library
+bool
+is_apath_starling_invalid(const path_t& apath);
+
+#if 0
+normalize_path();
+#endif
+}
+
+
+#include "align_path_impl.hh"
+
diff --git a/src/c++/lib/blt_util/align_path_impl.hh b/src/c++/lib/blt_util/align_path_impl.hh
new file mode 100644
index 0000000..af95d3e
--- /dev/null
+++ b/src/c++/lib/blt_util/align_path_impl.hh
@@ -0,0 +1,89 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include <iterator>
+
+#include "blt_util/blt_exception.hh"
+
+
+namespace ALIGNPATH
+{
+
+
+/// convert the input path to use "seq match" state '=' and "seq mismatch" state 'X' instead of "alignment match" state 'M'
+/// 'N''s always count as mismatch to the reference
+///
+template <typename symIter1, typename symIter2>
+void
+apath_add_seqmatch(
+    const symIter1 queryBegin,
+    const symIter1 queryEnd,
+    const symIter2 refBegin,
+    const symIter2 refEnd,
+    path_t& apath)
+{
+    path_t apath2;
+
+    symIter1 queryIndex(queryBegin);
+    symIter2 refIndex(refBegin);
+
+    const unsigned as(apath.size());
+    for (unsigned segmentIndex(0); segmentIndex<as; ++segmentIndex)
+    {
+        const path_segment& ps(apath[segmentIndex]);
+        if (is_segment_align_match(ps.type))
+        {
+            for (unsigned segmentPos(0); segmentPos<ps.length; ++segmentPos)
+            {
+                if (queryIndex >= queryEnd)
+                {
+                    throw blt_exception("apath_add_seqmatch: past end of query\n");
+                }
+
+                if (refIndex >= refEnd)
+                {
+                    throw blt_exception("apath_add_seqmatch: past end of reference\n");
+                }
+
+                bool isSeqMatch((*queryIndex) == (*refIndex));
+                if ((*queryIndex == 'N') || (*refIndex == 'N')) isSeqMatch = false;
+                apath_append(apath2, ( isSeqMatch ? SEQ_MATCH : SEQ_MISMATCH));
+
+                ++queryIndex;
+                ++refIndex;
+            }
+        }
+        else
+        {
+            apath2.push_back(ps);
+
+            if (is_segment_type_read_length(ps.type)) std::advance(queryIndex,ps.length);
+            if (is_segment_type_ref_length(ps.type)) std::advance(refIndex,ps.length);
+        }
+    }
+
+    apath = apath2;
+}
+
+}
diff --git a/src/c++/lib/blt_util/align_path_match_descriptor.cpp b/src/c++/lib/blt_util/align_path_match_descriptor.cpp
new file mode 100644
index 0000000..c95e0b2
--- /dev/null
+++ b/src/c++/lib/blt_util/align_path_match_descriptor.cpp
@@ -0,0 +1,420 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+
+#include "blt_util/align_path_match_descriptor.hh"
+#include "blt_util/blt_exception.hh"
+#include "blt_util/log.hh"
+#include "blt_util/parse_util.hh"
+#include "blt_util/seq_util.hh"
+
+#include "boost/lexical_cast.hpp"
+
+#include <cassert>
+
+#include <sstream>
+
+
+enum
+{
+    INDEL_BEGIN='^',
+    INDEL_END='$'
+};
+
+
+
+static
+void
+unknown_md_error(const char* const md,
+                 const char* const mdptr)
+{
+
+    std::ostringstream oss;
+    oss << "ERROR: can't parse match descriptor string: " << md << "\n"
+        << "\tunexpected character: '" << *mdptr << "' at position: " << (mdptr-md+1) << "\n";
+    throw blt_exception(oss.str().c_str());
+}
+
+
+
+namespace ALIGNPATH
+{
+
+
+static
+void
+apath_push(path_t& apath,
+           path_segment& ps,
+           const align_t t)
+{
+
+    if ( (0==ps.length) || (ps.type==t) ) return;
+    apath.push_back(ps);
+    ps.clear();
+}
+
+
+
+static
+void
+export_md_to_apath_impl(const char* md,
+                        path_t& apath)
+{
+
+    using illumina::blt_util::parse_unsigned;
+
+    const char* mdptr(md);
+    path_segment ps;
+
+    while (*mdptr)
+    {
+        if       (isdigit(*mdptr))
+        {
+            apath_push(apath,ps,MATCH);
+            const unsigned mlen(parse_unsigned(mdptr));
+            ps.length += mlen;
+            ps.type = MATCH;
+
+        }
+        else if (is_valid_base(*mdptr))
+        {
+            apath_push(apath,ps,MATCH);
+            mdptr++;
+            ps.length++;
+            ps.type = MATCH;
+
+        }
+        else if (*mdptr == INDEL_BEGIN)
+        {
+            mdptr++; // eat INDEL_BEGIN
+
+            while (*mdptr != INDEL_END)
+            {
+                if       (isdigit(*mdptr))
+                {
+                    apath_push(apath,ps,INSERT);
+                    const unsigned mlen(parse_unsigned(mdptr));
+                    ps.length=mlen;
+                    ps.type=INSERT;
+
+                }
+                else if (is_valid_base(*mdptr))
+                {
+                    apath_push(apath,ps,DELETE);
+                    mdptr++;
+                    ps.length++;
+                    ps.type=DELETE;
+
+                }
+                else
+                {
+                    unknown_md_error(md,mdptr);
+                }
+            }
+
+            mdptr++; // eat INDEL_END
+
+        }
+        else
+        {
+            unknown_md_error(md,mdptr);
+        }
+    }
+
+    apath_push(apath,ps,NONE);
+}
+
+
+
+void
+export_md_to_apath(const char* md,
+                   const bool is_fwd_strand,
+                   path_t& apath,
+                   const bool is_edge_deletion_error)
+{
+
+    // to make best use of previous code, we parse the MD in the
+    // alignment direction and then orient apath to the forward strand
+    // as a second step if required
+    //
+    assert(NULL != md);
+
+    apath.clear();
+    export_md_to_apath_impl(md,apath);
+
+    unsigned as(apath.size());
+
+    if ( ((as>0) and (apath.front().type == DELETE)) or
+         ((as>1) and (apath.back().type == DELETE)) )
+    {
+        std::ostringstream oss;
+        if (is_edge_deletion_error)
+        {
+            oss << "ERROR: ";
+        }
+        else
+        {
+            oss << "WARNING: ";
+        }
+        oss << "alignment path: " << apath_to_cigar(apath) << " contains meaningless edge deletion.\n";
+        if (is_edge_deletion_error)
+        {
+            throw blt_exception(oss.str().c_str());
+        }
+        else
+        {
+            log_os << oss.str();
+            path_t apath2;
+            for (unsigned i(0); i<as; ++i)
+            {
+                if (((i==0) or ((i+1)==as)) and
+                    apath[i].type == DELETE) continue;
+                apath2.push_back(apath[i]);
+            }
+            apath=apath2;
+            as=apath.size();
+        }
+    }
+
+    if ( (not is_fwd_strand) and (as>1) )
+    {
+        std::reverse(apath.begin(),apath.end());
+    }
+}
+
+
+
+static
+void
+fwd_apath_to_export_md(path_t& apath,
+                       const char* ref_begin,
+                       const char* ref_bases,
+                       const char* ref_end,
+                       const char* read_bases,
+                       std::string& md)
+{
+
+    // process the align path
+    bool foundUnsupportedCigar = false;
+    path_t::const_iterator pCIter;
+    for (pCIter = apath.begin(); pCIter != apath.end(); ++pCIter)
+    {
+
+        if (pCIter->type == DELETE)
+        {
+
+            // handle deletion
+            md.push_back('^');
+            for (uint32_t i = 0; i < pCIter->length; ++i, ++ref_bases)
+            {
+                md.push_back(*ref_bases);
+            }
+            md.push_back('$');
+
+        }
+        else if (pCIter->type == INSERT)
+        {
+
+            // handle insertion
+            md.push_back('^');
+            md += boost::lexical_cast<std::string>(pCIter->length);
+            read_bases += pCIter->length;
+            md.push_back('$');
+
+        }
+        else if (is_segment_align_match(pCIter->type))
+        {
+
+            // handle match/mismatch
+            uint32_t numMatchingBases = 0;
+            for (uint32_t i = 0; i < pCIter->length; ++i, ++ref_bases, ++read_bases)
+            {
+
+                // handle circular genome
+                if ((ref_bases < ref_begin) || (ref_bases > ref_end))
+                {
+                    md.push_back('N');
+                    continue;
+                }
+
+                if (*ref_bases != *read_bases)
+                {
+
+                    // write the number of preceding matching bases
+                    if (numMatchingBases != 0)
+                    {
+                        md += boost::lexical_cast<std::string>(numMatchingBases);
+                        numMatchingBases = 0;
+                    }
+
+                    // output the mismatched base
+                    md.push_back(*ref_bases);
+
+                }
+                else ++numMatchingBases;
+            }
+
+            // write the number of trailing matching bases
+            if (numMatchingBases != 0)
+            {
+                md += boost::lexical_cast<std::string>(numMatchingBases);
+            }
+
+        }
+        else
+        {
+
+            // handle unsupported CIGAR operation
+            foundUnsupportedCigar = true;
+            break;
+        }
+    }
+
+    if (foundUnsupportedCigar) md = "UNSUPPORTED";
+}
+
+
+
+static
+void
+rev_apath_to_export_md(path_t& apath,
+                       const char* ref_begin,
+                       const char* ref_bases,
+                       const char* ref_end,
+                       const char* read_bases,
+                       std::string& md)
+{
+
+    // process the align path
+    bool foundUnsupportedCigar = false;
+    path_t::const_reverse_iterator pCRIter;
+    for (pCRIter = apath.rbegin(); pCRIter != apath.rend(); ++pCRIter)
+    {
+        if (pCRIter->type == DELETE)
+        {
+            // handle deletion
+            md.push_back('^');
+            for (uint32_t i = 0; i < pCRIter->length; ++i, --ref_bases)
+            {
+                md.push_back(comp_base(*ref_bases));
+            }
+            md.push_back('$');
+
+        }
+        else if (pCRIter->type == INSERT)
+        {
+            // handle insertion
+            md.push_back('^');
+            md += boost::lexical_cast<std::string>(pCRIter->length);
+            read_bases += pCRIter->length;
+            md.push_back('$');
+
+        }
+        else if (is_segment_align_match(pCRIter->type))
+        {
+            // recreate the the match descriptor for this non-INDEL region
+            uint32_t numMatchingBases = 0;
+            for (uint32_t i = 0; i < pCRIter->length; ++i, --ref_bases, ++read_bases)
+            {
+                // handle circular genome
+                if ((ref_bases < ref_begin) || (ref_bases > ref_end))
+                {
+                    md.push_back('N');
+                    continue;
+                }
+
+                const char rcRefBase = comp_base(*ref_bases);
+
+                if (rcRefBase != *read_bases)
+                {
+
+                    // write the number of preceding matching bases
+                    if (numMatchingBases != 0)
+                    {
+                        md += boost::lexical_cast<std::string>(numMatchingBases);
+                        numMatchingBases = 0;
+                    }
+
+                    // output the mismatched base
+                    md.push_back(rcRefBase);
+
+                }
+                else ++numMatchingBases;
+            }
+
+            // write the number of trailing matching bases
+            if (numMatchingBases != 0)
+            {
+                md += boost::lexical_cast<std::string>(numMatchingBases);
+            }
+        }
+        else
+        {
+            // handle unsupported CIGAR operation
+            foundUnsupportedCigar = true;
+            break;
+        }
+    }
+
+    if (foundUnsupportedCigar) md = "UNSUPPORTED";
+}
+
+
+
+void
+apath_to_export_md(path_t& apath,
+                   const char* ref_seq,
+                   const char* ref_end,
+                   const int32_t ref_pos,
+                   const std::string& read_bases,
+                   const bool is_fwd_strand,
+                   std::string& md)
+{
+    md.clear();
+
+    if (is_fwd_strand)
+    {
+        const char* pRead      = read_bases.c_str();
+        const char* pReference = ref_seq + ref_pos - 1;
+        fwd_apath_to_export_md(apath, ref_seq, pReference, ref_end, pRead, md);
+
+    }
+    else
+    {
+        uint32_t numRefBases = 0;
+        for (const auto& ps : apath)
+        {
+            if (is_segment_align_match(ps.type) || (ps.type == DELETE))
+            {
+                numRefBases += ps.length;
+            }
+        }
+
+        const char* pRead      = read_bases.c_str();
+        const char* pReference = ref_seq + ref_pos + numRefBases - 2;
+        rev_apath_to_export_md(apath, ref_seq, pReference, ref_end, pRead, md);
+    }
+}
+
+}
diff --git a/src/c++/lib/blt_util/align_path_match_descriptor.hh b/src/c++/lib/blt_util/align_path_match_descriptor.hh
new file mode 100644
index 0000000..775bcdc
--- /dev/null
+++ b/src/c++/lib/blt_util/align_path_match_descriptor.hh
@@ -0,0 +1,53 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+/// \brief functions to convert alignments to/from the match descriptor (MD) format
+///
+/// Note MD is an older format which should be considered deprecated.
+///
+
+#pragma once
+
+#include "blt_util/align_path.hh"
+
+
+
+namespace ALIGNPATH
+{
+
+void
+export_md_to_apath(const char* md,
+                   const bool is_fwd_strand,
+                   path_t& apath,
+                   const bool is_edge_deletion_error=true);
+
+void
+apath_to_export_md(path_t& apath,
+                   const char* ref_seq,
+                   const char* ref_end,
+                   const int32_t ref_pos,
+                   const std::string& read_bases,
+                   const bool is_fwd_strand,
+                   std::string& md);
+
+}
diff --git a/src/c++/lib/blt_util/align_path_util.hh b/src/c++/lib/blt_util/align_path_util.hh
new file mode 100644
index 0000000..de496da
--- /dev/null
+++ b/src/c++/lib/blt_util/align_path_util.hh
@@ -0,0 +1,108 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/align_path.hh"
+#include "blt_util/blt_types.hh"
+
+
+#include <cassert>
+
+
+namespace ALIGNPATH
+{
+
+inline
+void
+increment_path(const path_t& path,
+               unsigned& path_index,
+               unsigned& read_offset,
+               pos_t& ref_offset)
+{
+    const path_segment& ps(path[path_index]);
+
+    if       (is_segment_align_match(ps.type))
+    {
+        read_offset += ps.length;
+        ref_offset += ps.length;
+    }
+    else if ((ps.type == DELETE) || (ps.type == SKIP))
+    {
+        ref_offset += ps.length;
+    }
+    else if ((ps.type == INSERT) || (ps.type == SOFT_CLIP))
+    {
+        read_offset += ps.length;
+    }
+    else if ((ps.type == HARD_CLIP) || (ps.type == PAD))
+    {
+        // do nothing
+    }
+    else
+    {
+        assert(false && "Unexpected alignment type"); // can't handle other CIGAR types yet
+    }
+
+    path_index++;
+}
+
+
+// Initialize to the segment count, insert and delete size of a
+// swap in the path. assumes path_index points to the begining of
+// a swap:
+//
+struct swap_info
+{
+    swap_info(const path_t& path,
+              const unsigned path_index)
+        : n_seg(path_index)
+        , insert_length(0)
+        , delete_length(0)
+    {
+        const unsigned aps(path.size());
+        for (; (n_seg<aps) && is_segment_type_indel(path[n_seg].type); ++n_seg)
+        {
+            const path_segment& ps(path[n_seg]);
+            if     (ps.type==INSERT)
+            {
+                insert_length += ps.length;
+            }
+            else if (ps.type==DELETE)
+            {
+                delete_length += ps.length;
+            }
+            else
+            {
+                assert(false && "Unexpected alignment type");
+            }
+        }
+        n_seg -= path_index;
+    }
+
+    unsigned n_seg;
+    unsigned insert_length;
+    unsigned delete_length;
+};
+}
diff --git a/src/c++/lib/blt_util/basic_matrix.hh b/src/c++/lib/blt_util/basic_matrix.hh
new file mode 100644
index 0000000..956a5ad
--- /dev/null
+++ b/src/c++/lib/blt_util/basic_matrix.hh
@@ -0,0 +1,105 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#pragma once
+
+#include <vector>
+
+
+/// very simple matrix implementation, row major
+template <typename T>
+struct basic_matrix
+{
+    typedef typename std::vector<T> data_t;
+    typedef typename data_t::iterator iterator;
+    typedef typename data_t::const_iterator const_iterator;
+
+    basic_matrix(
+        const unsigned rowCount = 0,
+        const unsigned colCount = 0) :
+        _colCount(colCount),
+        _data(rowCount* colCount)
+    {}
+
+    void
+    resize(
+        const unsigned rowCount,
+        const unsigned colCount)
+    {
+        _colCount=colCount;
+        _data.resize(rowCount*colCount);
+    }
+
+    T&
+    val(const unsigned row,
+        const unsigned col)
+    {
+        return _data[(row*_colCount+col)];
+    }
+
+    const T&
+    val(const unsigned row,
+        const unsigned col) const
+    {
+        return _data[(row*_colCount+col)];
+    }
+
+    bool
+    empty()
+    {
+        return _data.empty();
+    }
+
+    size_t
+    size()
+    {
+        return _data.size();
+    }
+
+    iterator
+    begin()
+    {
+        return _data.begin();
+    }
+
+    const_iterator
+    begin() const
+    {
+        return _data.begin();
+    }
+
+    iterator
+    end()
+    {
+        return _data.end();
+    }
+
+    const_iterator
+    end() const
+    {
+        return _data.end();
+    }
+
+private:
+    unsigned _colCount;
+    std::vector<T> _data;
+};
+
+
diff --git a/src/c++/lib/blt_util/binomial_test.cpp b/src/c++/lib/blt_util/binomial_test.cpp
new file mode 100644
index 0000000..5dc9065
--- /dev/null
+++ b/src/c++/lib/blt_util/binomial_test.cpp
@@ -0,0 +1,194 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \author Chris Saunders
+/// \author Mitch Bekritsky
+///
+
+#include "blt_util/binomial_test.hh"
+#include "blt_util/stat_util.hh"
+
+#include <boost/math/distributions/binomial.hpp>
+#include <boost/math/distributions/complement.hpp>
+
+using boost::math::binomial;
+using boost::math::cdf;
+using boost::math::complement;
+
+#include <algorithm>
+
+
+
+double
+get_binomial_twosided_exact_pval(
+    const double p,
+    const unsigned n_success,
+    const unsigned n_trials)
+{
+    assert((p>0.) && (p<1.));
+    assert(n_success <= n_trials);
+
+    // otherwise we get p == 2
+    if (n_trials == 0)
+    {
+        return 1;
+    }
+
+    if (fabs(p - 0.5) < DBL_EPSILON)
+    {
+        const unsigned n_failure(n_trials-n_success);
+        const double obs_p((double)n_success/(double)n_trials);
+
+        double exact_prob;
+        if (obs_p <= p)
+        {
+            exact_prob=cdf(binomial(n_trials,p),n_success);
+        }
+        else
+        {
+            exact_prob=cdf(binomial(n_trials,1.-p),n_failure);
+        }
+
+        return std::min(1.0, 2.*exact_prob);
+    }
+    else
+    {
+        // naive implementation -- this can be improved
+        // in two ways:
+        // * find upper / lower bound so we don't have to
+        //   evaluate the pdf for every single value
+        //   between 0 and n_trials
+        // * be smarter about additive error
+        binomial dist = binomial(n_trials, p);
+        double exact_prob = pdf(dist, n_success);
+        double result = 0;
+        for (unsigned j = 0; j <= n_trials; ++j)
+        {
+            double pp = pdf(dist, j);
+            if (pp <= exact_prob)
+            {
+                result += pp;
+            }
+        }
+        return result;
+    }
+}
+
+
+
+bool
+is_reject_binomial_twosided_exact(
+    const double alpha,
+    const double p,
+    const unsigned n_success,
+    const unsigned n_trials)
+{
+    return (get_binomial_twosided_exact_pval(p,n_success,n_trials)<alpha);
+}
+
+
+
+bool
+is_reject_binomial_twosided_chi_sqr(
+    const double alpha,
+    const double p,
+    const unsigned n_success,
+    const unsigned n_trials)
+{
+    assert((p>0.) && (p<1.));
+    assert(n_success <= n_trials);
+
+    const unsigned n_failure(n_trials-n_success);
+    const double e_success(p*n_trials);
+    const double e_failure(((double)n_trials)-e_success);
+
+    const double d_success(n_success-e_success);
+    const double d_failure(n_failure-e_failure);
+
+    const double xsq((d_success*d_success)/e_success+(d_failure*d_failure)/e_failure);
+
+    return is_chi_sqr_reject(xsq,1,alpha);
+}
+
+
+
+bool
+is_reject_binomial_twosided(
+    const double alpha,
+    const double p,
+    const unsigned n_success,
+    const unsigned n_trials)
+{
+    static const unsigned exact_test_threshold(250);
+
+    if (n_trials > exact_test_threshold)
+    {
+        return is_reject_binomial_twosided_chi_sqr(alpha,p,n_success,n_trials);
+    }
+    else
+    {
+        return is_reject_binomial_twosided_exact(alpha,p,n_success,n_trials);
+    }
+}
+
+
+
+double
+get_binomial_gte_n_success_exact_pval(
+    const double p,
+    const unsigned n_success,
+    const unsigned n_trials)
+{
+    //although binomial probabilities of
+    // 0 or 1 are possible, they don't have much meaning
+    assert((p >= 0.) && (p <= 1.));
+    assert(n_success <= n_trials);
+    if (n_success==0) return 1;
+
+    return cdf(complement(binomial(n_trials, p), n_success - 1));
+}
+
+
+
+bool
+is_reject_binomial_gte_n_success_exact(
+    const double alpha,
+    const double p,
+    const unsigned n_success,
+    const unsigned n_trials)
+{
+    assert(alpha >= 0);
+
+    const double observed_pval = get_binomial_gte_n_success_exact_pval(p, n_success, n_trials);
+
+    return (observed_pval <= alpha);
+}
+
+double
+min_count_binomial_gte_exact(
+    const double alpha,
+    const double p,
+    const unsigned n_trials)
+{
+    assert(alpha >= 0);
+    assert((p >= 0.) && (p <= 1.));
+
+    return (1 + quantile(complement(binomial(n_trials, p), alpha)));
+}
diff --git a/src/c++/lib/blt_util/binomial_test.hh b/src/c++/lib/blt_util/binomial_test.hh
new file mode 100644
index 0000000..4d907ce
--- /dev/null
+++ b/src/c++/lib/blt_util/binomial_test.hh
@@ -0,0 +1,105 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \author Chris Saunders
+/// \author Mitch Bekritsky
+///
+
+#pragma once
+
+
+/// \brief two-sided binomial exact probability
+///
+/// This is a two sided binomial exact pval wherein we find the
+/// prob of n_success or more extreme number of successes and then
+/// double it.
+///
+///
+double
+get_binomial_twosided_exact_pval(
+    const double p,
+    const unsigned n_success,
+    const unsigned n_trials);
+
+/// \brief two-sided binomial exact test
+///
+bool
+is_reject_binomial_twosided_exact(
+    const double alpha,
+    const double p,
+    const unsigned n_success,
+    const unsigned n_trials);
+
+/// \brief two-sided binomial test chi-sqr approximation
+bool
+is_reject_binomial_twosided_chi_sqr(
+    const double alpha,
+    const double p,
+    const unsigned n_success,
+    const unsigned n_trials);
+
+/// \brief two-sided binomial test
+///
+/// Find the probability of n_success or more extreme success under B(n_trial,p)
+///
+/// This function chooses from the two testing methods above (exact/approx) based on trial size
+///
+bool
+is_reject_binomial_twosided(
+    const double alpha,
+    const double p,
+    const unsigned n_success,
+    const unsigned n_trials);
+
+
+/// \brief one-sided binomial exact probability
+///
+/// probability of n_success or more given B(n_trials,p)
+///
+/// matches R code: pbinom((n_success-1),n_trials,p,lower.tail=FALSE)
+double
+get_binomial_gte_n_success_exact_pval(
+    const double p,
+    const unsigned n_success,
+    const unsigned n_trials);
+
+
+/// \brief one-sided binomial exact test
+///
+/// tests whether n_success or greater can be rejected under
+/// a null hypothesis of B(n_trials,p)
+///
+/// matches R code: binom.test(n_success, n_trials, p, "greater")$p.value <= alpha
+bool
+is_reject_binomial_gte_n_success_exact(
+    const double alpha,
+    const double p,
+    const unsigned n_success,
+    const unsigned n_trials);
+
+/// returns the minimum number of successes to reject the null hypothesis
+/// with a p-value of at most alpha for a given error rate and number of trials
+///
+/// matches R code 1 + qbinom(alpha, n_trials, p, lower.tail = FALSE)
+double
+min_count_binomial_gte_exact(
+    const double alpha,
+    const double p,
+    const unsigned n_trials);
diff --git a/src/c++/lib/blt_util/blt_exception.cpp b/src/c++/lib/blt_util/blt_exception.cpp
new file mode 100644
index 0000000..80fba71
--- /dev/null
+++ b/src/c++/lib/blt_util/blt_exception.cpp
@@ -0,0 +1,46 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \file
+
+/// \author Chris Saunders
+///
+#include "blt_util/blt_exception.hh"
+
+#ifdef KILL_EXCEPTIONS
+#include "blt_util/log.hh"
+
+#include <cstdlib>
+
+#include <iostream>
+#endif
+
+
+
+blt_exception::
+blt_exception(const char* s)
+    : message(s)
+{
+#ifdef KILL_EXCEPTIONS
+    log_os << "ERROR:: " << s << std::endl;
+    abort();
+#endif
+}
+
diff --git a/src/c++/lib/blt_util/blt_exception.hh b/src/c++/lib/blt_util/blt_exception.hh
new file mode 100644
index 0000000..b3cdacf
--- /dev/null
+++ b/src/c++/lib/blt_util/blt_exception.hh
@@ -0,0 +1,44 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/compat_util.hh"
+
+#include <exception>
+#include <string>
+
+/// \brief a minimal exception class
+struct blt_exception : public std::exception
+{
+    explicit
+    blt_exception(const char* s);
+
+    const char* what() const noexcept
+    {
+        return message.c_str();
+    }
+
+    std::string message;
+};
diff --git a/src/c++/lib/blt_util/blt_types.hh b/src/c++/lib/blt_util/blt_types.hh
new file mode 100644
index 0000000..a4b6d06
--- /dev/null
+++ b/src/c++/lib/blt_util/blt_types.hh
@@ -0,0 +1,32 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <cstdint>
+
+
+typedef int32_t pos_t;
+
+typedef float blt_float_t;
diff --git a/src/c++/lib/blt_util/chrom_depth_map.cpp b/src/c++/lib/blt_util/chrom_depth_map.cpp
new file mode 100644
index 0000000..5248418
--- /dev/null
+++ b/src/c++/lib/blt_util/chrom_depth_map.cpp
@@ -0,0 +1,102 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \file
+///
+/// \author Chris Saunders
+///
+
+
+#include "blt_util/blt_exception.hh"
+#include "blt_util/chrom_depth_map.hh"
+#include "blt_util/log.hh"
+#include "blt_util/parse_util.hh"
+
+#include <cstdlib>
+#include <cstring>
+
+#include <fstream>
+#include <iostream>
+
+
+
+// parse the chrom depth file
+void
+parse_chrom_depth(const std::string& chrom_depth_file,
+                  cdmap_t& chrom_depth)
+{
+
+    if (chrom_depth_file.empty()) return;
+
+    std::ifstream depth_is(chrom_depth_file.c_str());
+    if (! depth_is)
+    {
+        log_os << "ERROR: Failed to open chrom depth file '" << chrom_depth_file << "'\n";
+        exit(EXIT_FAILURE);
+    }
+
+    static const unsigned buff_size(1024);
+    char buff[buff_size];
+
+    unsigned line_no(0);
+
+    while (true)
+    {
+        depth_is.getline(buff,buff_size);
+        if (! depth_is)
+        {
+            if     (depth_is.eof()) break;
+            else
+            {
+                log_os << "ERROR: unexpected failure while attempting to read chrom depth file line " << (line_no+1) << "\n";
+                exit(EXIT_FAILURE);
+            }
+        }
+        else
+        {
+            ++line_no;
+        }
+
+        char* word2(strchr(buff,'\t'));
+        if (NULL == word2)
+        {
+            log_os << "ERROR: unexpected format in read chrom depth file line " << (line_no) << "\n";
+            exit(EXIT_FAILURE);
+        }
+        *(word2++) = '\0';
+        try
+        {
+            const char* s(word2);
+            chrom_depth[buff] = illumina::blt_util::parse_double(s);
+        }
+        catch (const blt_exception&)
+        {
+            log_os << "ERROR: unexpected format in read chrom depth file line " << (line_no) << "\n";
+            throw;
+        }
+        if (chrom_depth[buff] < 0)
+        {
+            log_os << "ERROR: Chromosome depth estimate is negative. Chromosome: '" << buff
+                   << "' Depth: " << chrom_depth[buff] << "\n";
+            exit(EXIT_FAILURE);
+        }
+    }
+}
+
diff --git a/src/c++/lib/blt_util/chrom_depth_map.hh b/src/c++/lib/blt_util/chrom_depth_map.hh
new file mode 100644
index 0000000..fd2caaa
--- /dev/null
+++ b/src/c++/lib/blt_util/chrom_depth_map.hh
@@ -0,0 +1,37 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <map>
+#include <string>
+
+
+typedef std::map<std::string,double> cdmap_t;
+
+
+// parse the chrom depth file
+void
+parse_chrom_depth(const std::string& chrom_depth_file,
+                  cdmap_t& chrom_depth);
diff --git a/src/c++/lib/blt_util/compat_unistd.h b/src/c++/lib/blt_util/compat_unistd.h
new file mode 100644
index 0000000..2799378
--- /dev/null
+++ b/src/c++/lib/blt_util/compat_unistd.h
@@ -0,0 +1,52 @@
+#ifndef _UNISTD_H
+#define _UNISTD_H    1
+
+/* This file intended to serve as a drop-in replacement for
+ *  unistd.h on Windows
+ *  Please add functionality as neeeded
+ */
+
+#include <stdlib.h>
+#include <io.h>
+/*#include <getopt.h>*/ /* getopt at: https://gist.github.com/ashelly/7776712 */
+#include <process.h> /* for getpid() and the exec..() family */
+#include <direct.h> /* for _getcwd() and _chdir() */
+
+#define srandom srand
+#define random rand
+
+/* Values for the second argument to access.
+   These may be OR'd together.  */
+#define R_OK    4       /* Test for read permission.  */
+#define W_OK    2       /* Test for write permission.  */
+//#define   X_OK    1       /* execute permission - unsupported in windows*/
+#define F_OK    0       /* Test for existence.  */
+
+//#define access _access
+#define dup2 _dup2
+#define execve _execve
+#define ftruncate _chsize
+#define unlink _unlink
+#define fileno _fileno
+#define getcwd _getcwd
+#define chdir _chdir
+#define isatty _isatty
+#define lseek _lseek
+/* read, write, and close are NOT being #defined here, because while there are file handle specific versions for Windows, they probably don't work for sockets. You need to look at your app and consider whether to call e.g. closesocket(). */
+
+#define ssize_t int
+
+#define STDIN_FILENO 0
+#define STDOUT_FILENO 1
+#define STDERR_FILENO 2
+/* should be in some equivalent to <sys/types.h> */
+/*typedef __int8            int8_t;*/
+typedef __int16           int16_t;
+typedef __int32           int32_t;
+typedef __int64           int64_t;
+typedef unsigned __int8   uint8_t;
+typedef unsigned __int16  uint16_t;
+typedef unsigned __int32  uint32_t;
+typedef unsigned __int64  uint64_t;
+
+#endif /* unistd.h  */
\ No newline at end of file
diff --git a/src/c++/lib/blt_util/compat_util.cpp b/src/c++/lib/blt_util/compat_util.cpp
new file mode 100644
index 0000000..50c6202
--- /dev/null
+++ b/src/c++/lib/blt_util/compat_util.cpp
@@ -0,0 +1,84 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "compat_util.hh"
+
+#include <cerrno>
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+
+#include <iostream>
+
+
+#ifdef _MSC_VER
+#include "compat_util_win32_realpath.c"
+#endif
+
+
+
+bool
+compat_realpath(std::string& path)
+{
+    errno=0;
+    const char* newpath(realpath(path.c_str(),NULL));
+    if ((NULL==newpath) || (errno!=0))
+    {
+        if (NULL!=newpath) free((void*)newpath);
+        return false;
+    }
+    path = newpath;
+    free((void*)newpath);
+    return true;
+}
+
+
+
+double
+compat_round(const double x)
+{
+    if (x>=0.)
+    {
+        return std::floor(x+0.5);
+    }
+    else
+    {
+        return std::ceil(x-0.5);
+    }
+}
+
+
+
+const char*
+compat_basename(const char* str)
+{
+#ifdef _MSC_VER
+    static const char pathsep('\\');
+#else
+    static const char pathsep('/');
+#endif
+    const char* res(strrchr(str,pathsep));
+    if (NULL==res) return str;
+    return res+1;
+}
diff --git a/src/c++/lib/blt_util/compat_util.hh b/src/c++/lib/blt_util/compat_util.hh
new file mode 100644
index 0000000..2977619
--- /dev/null
+++ b/src/c++/lib/blt_util/compat_util.hh
@@ -0,0 +1,54 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+// take care of some (mostly C99) functions not available in VS C++
+//
+
+#pragma once
+
+#include <string>
+
+
+#ifdef _MSC_VER
+#define snprintf _snprintf
+#define strdup _strdup
+#endif
+
+#if ((defined(_MSC_VER)) && (_MSC_VER <= 1800))
+#undef noexcept
+#define noexcept
+#endif
+
+double
+compat_round(const double x);
+
+
+const char*
+compat_basename(const char* s);
+
+
+// gets canonical name of paths, but only when these refer to existing items
+// returns false on error.
+bool
+compat_realpath(std::string& path);
diff --git a/src/c++/lib/blt_util/compat_util_win32_realpath.c b/src/c++/lib/blt_util/compat_util_win32_realpath.c
new file mode 100644
index 0000000..2d25f25
--- /dev/null
+++ b/src/c++/lib/blt_util/compat_util_win32_realpath.c
@@ -0,0 +1,95 @@
+/* realpath.c
+ * $Id$
+ *
+ * Provides an implementation of the "realpath" function, conforming
+ * approximately to SUSv3, and adapted for use on native Microsoft(R)
+ * Win32 platforms.
+ *
+ * Written by Keith Marshall <keithmarshall at users.sourceforge.net>
+ *
+ * This is free software.  You may redistribute and/or modify it as you
+ * see fit, without restriction of copyright.
+ *
+ * This software is provided "as is", in the hope that it may be useful,
+ * but WITHOUT WARRANTY OF ANY KIND, not even any implied warranty of
+ * MERCHANTABILITY, nor of FITNESS FOR ANY PARTICULAR PURPOSE.  At no
+ * time will the author accept any form of liability for any damages,
+ * however caused, resulting from the use of this software.
+ *
+ */
+
+#ifdef _WIN32
+#include <io.h>
+#include <stdlib.h>
+#include <errno.h>
+
+char* __cdecl
+realpath( const char * name, char * resolved )
+{
+  char *retname = NULL;  /* we will return this, if we fail */
+
+  /* SUSv3 says we must set `errno = EINVAL', and return NULL,
+   * if `name' is passed as a NULL pointer.
+   */
+
+  if( name == NULL )
+    errno = EINVAL;
+
+  /* Otherwise, `name' must refer to a readable filesystem object,
+   * if we are going to resolve its absolute path name.
+   */
+
+  else if( _access( name, 4 ) == 0 )
+  {
+    /* If `name' didn't point to an existing entity,
+     * then we don't get to here; we simply fall past this block,
+     * returning NULL, with `errno' appropriately set by `access'.
+     *
+     * When we _do_ get to here, then we can use `_fullpath' to
+     * resolve the full path for `name' into `resolved', but first,
+     * check that we have a suitable buffer, in which to return it.
+     */
+
+    if( (retname = resolved) == NULL )
+    {
+      /* Caller didn't give us a buffer, so we'll exercise the
+       * option granted by SUSv3, and allocate one.
+       *
+       * `_fullpath' would do this for us, but it uses `malloc', and
+       * Microsoft's implementation doesn't set `errno' on failure.
+       * If we don't do this explicitly ourselves, then we will not
+       * know if `_fullpath' fails on `malloc' failure, or for some
+       * other reason, and we want to set `errno = ENOMEM' for the
+       * `malloc' failure case.
+       */
+
+      retname =(char*) malloc( _MAX_PATH );
+    }
+
+    /* By now, we should have a valid buffer.
+     * If we don't, then we know that `malloc' failed,
+     * so we can set `errno = ENOMEM' appropriately.
+     */
+
+    if( retname == NULL )
+      errno = ENOMEM;
+
+    /* Otherwise, when we do have a valid buffer,
+     * `_fullpath' should only fail if the path name is too long.
+     */
+
+    else if( (retname = _fullpath( retname, name, _MAX_PATH )) == NULL )
+      errno = ENAMETOOLONG;
+  }
+
+  /* By the time we get to here,
+   * `retname' either points to the required resolved path name,
+   * or it is NULL, with `errno' set appropriately, either of which
+   * is our required return condition.
+   */
+
+  return retname;
+}
+
+/* $RCSfile$: end of file */
+#endif
diff --git a/src/c++/lib/blt_util/depth_buffer.hh b/src/c++/lib/blt_util/depth_buffer.hh
new file mode 100644
index 0000000..75caab6
--- /dev/null
+++ b/src/c++/lib/blt_util/depth_buffer.hh
@@ -0,0 +1,168 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/blt_types.hh"
+#include "blt_util/RangeMap.hh"
+
+#include <cassert>
+
+
+/// base object for depth_buffers, do not call this directly
+struct depth_buffer_base
+{
+    void
+    clear()
+    {
+        _data.clear();
+    }
+
+protected:
+    unsigned
+    _val(const pos_t pos) const
+    {
+        return _data.getConstRefDefault(pos,0);
+    }
+
+    /// increment range [pos,pos+range) by one
+    void
+    _inc(const pos_t pos,
+         const unsigned incVal)
+    {
+        _data.getRef(pos) += incVal;
+    }
+
+    void
+    _clear(const pos_t pos)
+    {
+        if (_data.isKeyPresent(pos)) _data.erase(pos);
+    }
+
+private:
+    typedef RangeMap<pos_t,unsigned> count_t;
+    count_t _data;
+};
+
+
+/// simple map of position to depth
+///
+/// assumes that a narrow list of positions is maintained so that
+/// array based lookup optimizations can be used
+///
+struct depth_buffer : public depth_buffer_base
+{
+    unsigned
+    val(const pos_t pos) const
+    {
+        return _val(pos);
+    }
+
+    void
+    inc(const pos_t pos)
+    {
+        _inc(pos,1);
+    }
+
+    void
+    clear_pos(const pos_t pos)
+    {
+        _clear(pos);
+    }
+
+    /// return true if buffered depth exceeds depth in [begin,end]
+    bool
+    is_range_ge_than(const pos_t begin,
+                     const pos_t end,
+                     const unsigned depth) const
+    {
+        assert(begin <= end);
+        for (pos_t i(begin); i<=end; ++i)
+        {
+            if (val(i) >= depth) return true;
+        }
+        return false;
+    }
+};
+
+
+
+/// simple map of position to depth
+///
+/// assumes that a narrow list of positions is maintained so that
+/// array based lookup optimizations can be used
+///
+/// optionally "compresses" depth buffer so that multiple positions
+/// are binned together.
+///
+struct depth_buffer_compressible : public depth_buffer_base
+{
+    depth_buffer_compressible(
+        const unsigned compressionFactor=1)
+        : _csize(compressionFactor),
+          _halfcsize(_csize/2)
+    {
+        assert(_csize>=1);
+    }
+
+    unsigned
+    val(const pos_t pos) const
+    {
+        return ((_val(pos/_csize)+_halfcsize)/_csize);
+    }
+
+    /// increment range [pos,pos+range) by one
+    void
+    inc(pos_t pos,
+        const unsigned posRange = 1)
+    {
+        assert(posRange>=1);
+        const pos_t endPos(pos+posRange);
+        pos_t dataPos(pos/_csize);
+        while (true)
+        {
+            const pos_t blockEndPos(std::min(((dataPos+1)*static_cast<pos_t>(_csize)), endPos));
+            _inc(dataPos,(blockEndPos-pos));
+
+            if (blockEndPos==endPos) return;
+            pos = blockEndPos;
+            dataPos++;
+        }
+    }
+
+    /// if compressionFactor is gt 1, pos arguments must be ordered to prevent surprising behavior
+    void
+    clear_pos(const pos_t pos)
+    {
+        // compression factor only works here by assuming clear_pos is being called in order
+        if ((pos % _csize) != (_csize-1)) return;
+        const pos_t dataPos(pos/_csize);
+        _clear(dataPos);
+    }
+
+private:
+    const unsigned _csize;
+    const unsigned _halfcsize;
+};
+
diff --git a/src/c++/lib/blt_util/depth_buffer_util.cpp b/src/c++/lib/blt_util/depth_buffer_util.cpp
new file mode 100644
index 0000000..7d9fee1
--- /dev/null
+++ b/src/c++/lib/blt_util/depth_buffer_util.cpp
@@ -0,0 +1,50 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+
+#include "depth_buffer_util.hh"
+
+
+
+void
+add_alignment_to_depth_buffer(
+    const pos_t& pos,
+    const ALIGNPATH::path_t& apath,
+    depth_buffer& db)
+{
+    using namespace ALIGNPATH;
+
+    pos_t ref_head_pos(pos);
+
+    for (const path_segment& ps : apath)
+    {
+        if ( is_segment_align_match(ps.type) )
+        {
+            for (unsigned j(0); j<ps.length; ++j) db.inc(ref_head_pos+static_cast<pos_t>(j));
+        }
+
+        if ( is_segment_type_ref_length(ps.type) ) ref_head_pos += ps.length;
+    }
+}
+
diff --git a/src/c++/lib/blt_util/depth_buffer_util.hh b/src/c++/lib/blt_util/depth_buffer_util.hh
new file mode 100644
index 0000000..92acbcf
--- /dev/null
+++ b/src/c++/lib/blt_util/depth_buffer_util.hh
@@ -0,0 +1,37 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/align_path.hh"
+#include "blt_util/depth_buffer.hh"
+
+
+/// parse alignment into depth buffer object:
+///
+void
+add_alignment_to_depth_buffer(
+    const pos_t& pos,
+    const ALIGNPATH::path_t& apath,
+    depth_buffer& db);
diff --git a/src/c++/lib/blt_util/flyweight_observer.hh b/src/c++/lib/blt_util/flyweight_observer.hh
new file mode 100644
index 0000000..92d259a
--- /dev/null
+++ b/src/c++/lib/blt_util/flyweight_observer.hh
@@ -0,0 +1,67 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+///
+/// \brief extremely minimal observer pattern
+///
+/// this class is designed to assist setting up an observer pattern which has zero memory overhead, but
+/// has extremely limited abilities, see blt_util/observer.hh for a more general observer pattern support
+///
+
+#pragma once
+
+template <typename T>
+struct flyweight_notifier;
+
+
+template <typename T>
+struct flyweight_observer
+{
+    friend struct flyweight_notifier<T>;
+
+    flyweight_observer& operator=(const flyweight_observer&) = default;
+
+    virtual ~flyweight_observer() {}
+
+private:
+    virtual void
+    recieve_flyweight_notification(const T&) = 0;
+};
+
+
+template <typename T>
+struct flyweight_notifier
+{
+    typedef flyweight_observer<T> flyweight_observer_t;
+
+protected:
+    void
+    notify_flyweight_observer(
+        flyweight_observer_t* val,
+        const T& msg) const
+    {
+        val->recieve_flyweight_notification(msg);
+    }
+};
+
diff --git a/src/c++/lib/blt_util/id_map.hh b/src/c++/lib/blt_util/id_map.hh
new file mode 100644
index 0000000..03b8964
--- /dev/null
+++ b/src/c++/lib/blt_util/id_map.hh
@@ -0,0 +1,228 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+
+#pragma once
+
+#include "blt_util/blt_exception.hh"
+
+#include "boost/optional.hpp"
+
+#include <map>
+#include <vector>
+
+
+/// \brief Provides something like a set, but with sequential id numbers
+/// assigned to each key starting from 0
+///
+template <typename K, typename COMPARE = std::less<K>>
+struct id_set
+{
+    /// \brief Add object to set if not present, and return id
+    /// number in either case
+    unsigned insert_key(const K& key)
+    {
+        const typename k2id_t::const_iterator i(_k2id.find(key));
+        if (i==_k2id.end())
+        {
+            const unsigned id(_id2k.size());
+            _k2id[key]=id;
+            _id2k.push_back(key);
+            return id;
+        }
+        else
+        {
+            return i->second;
+        }
+    }
+
+    /// \brief Test if key exists in set
+    bool test_key(const K& key) const
+    {
+        return (_k2id.find(key) != _k2id.end());
+    }
+
+    /// \brief Get id of inserted key
+    boost::optional<unsigned>
+    get_optional_id(const K& key) const
+    {
+        const typename k2id_t::const_iterator i(_k2id.find(key));
+        if (i==_k2id.end())
+        {
+            return boost::optional<unsigned>();
+        }
+        return boost::optional<unsigned>(i->second);
+    }
+
+    /// \brief Get id of inserted key
+    unsigned get_id(const K& key) const
+    {
+        const typename k2id_t::const_iterator i(_k2id.find(key));
+        if (i==_k2id.end())
+        {
+            throw blt_exception("ERROR: id_set.get_id(): invalid key\n");
+        }
+        return i->second;
+    }
+
+    /// \brief Get pre-existing key
+    const K& get_key(const unsigned id) const
+    {
+        if (id>=_id2k.size())
+        {
+            throw blt_exception("ERROR: id_set.get_key(): invalid id\n");
+        }
+        return _id2k[id];
+    }
+
+    bool
+    empty() const
+    {
+        return _id2k.empty();
+    }
+
+    unsigned
+    size() const
+    {
+        return _id2k.size();
+    }
+
+    void
+    clear()
+    {
+        _k2id.clear();
+        _id2k.clear();
+    }
+
+private:
+    typedef std::map<K,unsigned,COMPARE> k2id_t;
+
+    k2id_t _k2id;
+    std::vector<K> _id2k;
+};
+
+
+
+/// \brief Provides something like a map, but with sequential id numbers
+/// assigned to each key starting from 0
+///
+/// The id numbers can be useful for faster lookup of the value, while
+/// retaining the option of doing key lookup when required
+///
+template <typename K, typename V, typename COMPARE = std::less<K>>
+struct id_map
+{
+    /// \brief Update map with (key,value) and return id
+    ///
+    unsigned insert(const K& key, const V& value)
+    {
+        const typename k2id_t::const_iterator i(_k2id.find(key));
+        if (i==_k2id.end())
+        {
+            const unsigned id(_id2kv.size());
+            _k2id[key]=id;
+            _id2kv.push_back(std::make_pair(key,value));
+            return id;
+        }
+        else
+        {
+            _id2kv[i->second] = std::make_pair(key,value);
+            return i->second;
+        }
+    }
+
+    /// \brief Test if key exists in map
+    bool test_key(const K& key) const
+    {
+        return (_k2id.find(key) != _k2id.end());
+    }
+
+    /// \brief Get id of inserted key
+    boost::optional<unsigned>
+    get_optional_id(const K& key) const
+    {
+        const typename k2id_t::const_iterator i(_k2id.find(key));
+        if (i==_k2id.end())
+        {
+            return boost::optional<unsigned>();
+        }
+        return boost::optional<unsigned>(i->second);
+    }
+
+    /// \brief Get id of inserted key
+    unsigned get_id(const K& key) const
+    {
+        const typename k2id_t::const_iterator i(_k2id.find(key));
+        if (i==_k2id.end())
+        {
+            throw blt_exception("ERROR: id_map.get_id(): invalid key\n");
+        }
+        return i->second;
+    }
+
+    /// \brief Get pre-existing key
+    const K& get_key(const unsigned id) const
+    {
+        if (id>=_id2kv.size())
+        {
+            throw blt_exception("ERROR: idmap.get_key(): invalid id\n");
+        }
+        return _id2kv[id].first;
+    }
+
+    /// \brief Get pre-existing key
+    const V& get_value(const unsigned id) const
+    {
+        if (id>=_id2kv.size())
+        {
+            throw blt_exception("ERROR: idmap.get_value(): invalid id\n");
+        }
+        return _id2kv[id].second;
+    }
+
+    bool
+    empty() const
+    {
+        return _id2kv.empty();
+    }
+
+    unsigned
+    size() const
+    {
+        return _id2kv.size();
+    }
+
+    void
+    clear()
+    {
+        _k2id.clear();
+        _id2kv.clear();
+    }
+
+private:
+    typedef std::map<K,unsigned,COMPARE> k2id_t;
+
+    k2id_t _k2id;
+    std::vector<std::pair<K,V>> _id2kv;
+};
diff --git a/src/c++/lib/blt_util/input_stream_handler.cpp b/src/c++/lib/blt_util/input_stream_handler.cpp
new file mode 100644
index 0000000..8575534
--- /dev/null
+++ b/src/c++/lib/blt_util/input_stream_handler.cpp
@@ -0,0 +1,196 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+
+#include "blt_util/blt_exception.hh"
+#include "blt_util/log.hh"
+#include "blt_util/input_stream_handler.hh"
+
+#include <cstdlib>
+
+#include <iostream>
+#include <sstream>
+
+
+
+static
+const char*
+input_type_label(const INPUT_TYPE::index_t i)
+{
+    using namespace INPUT_TYPE;
+
+    switch (i)
+    {
+    case NONE   :
+        return "NONE";
+    case READ   :
+        return "READ";
+    default :
+        log_os << "ERROR: unrecognized event type.\n";
+        exit(EXIT_FAILURE);
+    }
+}
+
+
+
+void
+input_stream_data::
+register_error(const char* label,
+               const int sample_no) const
+{
+    log_os << "ERROR: attempting to register " << label
+           << " with sample number: " << sample_no
+           << " more than once\n";
+    exit(EXIT_FAILURE);
+}
+
+
+
+
+input_stream_handler::
+input_stream_handler(
+    const input_stream_data& data)
+    : _data(data)
+    , _is_end(false)
+    , _is_head_pos(false)
+    , _head_pos(0)
+{
+    // initial loading for _stream_queue:
+    const unsigned rs(_data._reads.size());
+    for (unsigned i(0); i<rs; ++i)
+    {
+        push_next(INPUT_TYPE::READ,_data._reads.get_key(i),i);
+    }
+}
+
+
+
+bool
+input_stream_handler::
+next()
+{
+    if (_is_end) return false;
+
+    while (true)
+    {
+        if (_current.itype != INPUT_TYPE::NONE)
+        {
+            // reload stream_queue with current type and sample_no;
+            push_next(_current.itype,_current.sample_no,_current._order);
+            _last=_current;
+        }
+
+        if (_stream_queue.empty())
+        {
+            _current=input_record_info();
+            _is_end=true;
+            return false;
+        }
+        bool is_usable(true);
+        _current=_stream_queue.top();
+        _stream_queue.pop();
+
+        if (_is_head_pos &&
+            (_current.pos < _head_pos))
+        {
+            if (_current.itype == INPUT_TYPE::READ)
+            {
+                std::ostringstream oss;
+                oss << "ERROR: unexpected read order:\n"
+                    << "\tInput-record with pos/type/sample_no: "
+                    << (_current.pos+1) << "/" << input_type_label(_current.itype) << "/" << _current.sample_no
+                    << " follows pos/type/sample_no: "
+                    << (_last.pos+1) << "/" << input_type_label(_last.itype) << "/" << _current.sample_no << "\n";
+                throw blt_exception(oss.str().c_str());
+            }
+            else
+            {
+                std::ostringstream oss;
+                oss << "ERROR: unexpected input type: " << _current.itype << "\n";
+                throw blt_exception(oss.str().c_str());
+            }
+        }
+
+        if (_is_head_pos)
+        {
+            _head_pos=std::max(_head_pos,_current.pos);
+        }
+        else
+        {
+            _is_head_pos=true;
+            _head_pos=_current.pos;
+        }
+
+        if (is_usable) break;
+    }
+    return true;
+}
+
+
+
+static
+void
+get_next_read_pos(bool& is_next_read,
+                  pos_t& next_read_pos,
+                  bam_streamer& read_stream)
+{
+
+    is_next_read=read_stream.next();
+    if (is_next_read)
+    {
+        const bam_record& read_rec(*(read_stream.get_record_ptr()));
+        next_read_pos=(read_rec.pos()-1);
+    }
+    else
+    {
+        next_read_pos=0;
+    }
+}
+
+
+
+void
+input_stream_handler::
+push_next(const INPUT_TYPE::index_t itype,
+          const int sample_no,
+          const unsigned order)
+{
+
+    bool is_next(false);
+    pos_t next_pos;
+    if       (itype == INPUT_TYPE::READ)
+    {
+        bam_streamer& read_stream(*(_data._reads.get_value(order)));
+        get_next_read_pos(is_next,next_pos,read_stream);
+    }
+    else
+    {
+        std::ostringstream oss;
+        oss << "ERROR: unexpected input type: " << itype << "\n";
+        throw blt_exception(oss.str().c_str());
+    }
+    if (! is_next) return;
+    _stream_queue.push(input_record_info(next_pos,itype,sample_no,order));
+}
+
diff --git a/src/c++/lib/blt_util/input_stream_handler.hh b/src/c++/lib/blt_util/input_stream_handler.hh
new file mode 100644
index 0000000..05c6b1f
--- /dev/null
+++ b/src/c++/lib/blt_util/input_stream_handler.hh
@@ -0,0 +1,169 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+
+///
+/// object which accepts as input bam (and formerly vcf/other chromosome ordered) files from multiple
+/// samples and merges them in order
+///
+/// \author Chris Saunders
+///
+
+
+#pragma once
+
+
+#include "blt_util/id_map.hh"
+#include "htsapi/bam_streamer.hh"
+
+#include <map>
+#include <queue>
+#include <utility>
+
+
+namespace INPUT_TYPE
+{
+enum index_t { NONE, READ };
+}
+
+struct input_stream_hander;
+
+/// all inputs to be merged are registered to
+/// this object first
+struct input_stream_data
+{
+    void
+    register_reads(bam_streamer& bs,
+                   const int sample_no = 0)
+    {
+        if (_reads.test_key(sample_no)) register_error("reads",sample_no);
+        _reads.insert(sample_no,&bs);
+    }
+
+private:
+
+    void
+    register_error(const char* label,
+                   const int sample_no) const;
+
+
+/////////// data:
+    friend struct input_stream_handler;
+    typedef id_map<int,bam_streamer*> reads_t;
+
+    reads_t _reads;
+};
+
+
+
+struct input_record_info
+{
+    input_record_info(const pos_t p = 0,
+                      const INPUT_TYPE::index_t t = INPUT_TYPE::NONE,
+                      const int i = 0,
+                      const unsigned s = 0)
+        :  pos(p), itype(t), sample_no(i), _order(s) {}
+
+    // reverse logic implied by operator< such that the 'lower' values
+    // we'd like to see first will come up on top of the
+    // priority_queue
+    //
+    bool
+    operator<(const input_record_info& rhs) const
+    {
+        if (pos > rhs.pos) return true;
+        if (pos == rhs.pos)
+        {
+            if (itype < rhs.itype) return true;
+            if (itype==rhs.itype)
+            {
+                if (sample_no > rhs.sample_no) return true;
+                if (sample_no == rhs.sample_no)
+                {
+                    return (_order > rhs._order);
+                }
+            }
+        }
+        return false;
+    }
+
+    unsigned get_order() const
+    {
+        return _order;
+    }
+
+    pos_t pos;
+    INPUT_TYPE::index_t itype;
+    int sample_no;
+
+private:
+    friend struct input_stream_handler;
+
+    // record the submission order:
+    unsigned _order;
+};
+
+
+
+// streams multiple bam (and vcf) files to present the data
+// in positional order (but with offsets for vcfs to
+// run ahead of the bam reads)
+//
+struct input_stream_handler
+{
+    input_stream_handler(
+        const input_stream_data& data);
+
+    bool next();
+
+    input_record_info
+    get_current() const
+    {
+        return _current;
+    }
+
+    pos_t
+    get_head_pos() const
+    {
+        return _head_pos;
+    }
+
+private:
+
+    void
+    push_next(const INPUT_TYPE::index_t itype,
+              const int sample_no,
+              const unsigned order);
+
+
+///////////////////////////////// data:
+    const input_stream_data _data;
+
+    input_record_info _current;
+    input_record_info _last;
+
+    bool _is_end;
+
+    bool _is_head_pos;
+    pos_t _head_pos;
+
+    std::priority_queue<input_record_info> _stream_queue;
+};
+
diff --git a/src/c++/lib/blt_util/io_util.cpp b/src/c++/lib/blt_util/io_util.cpp
new file mode 100644
index 0000000..3580c7c
--- /dev/null
+++ b/src/c++/lib/blt_util/io_util.cpp
@@ -0,0 +1,67 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "blt_util/io_util.hh"
+
+#include "blt_util/blt_exception.hh"
+
+#include <cstdlib>
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+
+
+void
+open_ifstream(
+    std::ifstream& ifs,
+    const char* filename)
+{
+    ifs.open(filename);
+    if (! ifs)
+    {
+        std::ostringstream oss;
+        oss << "ERROR: Can't open file: " << filename << "\n";
+        throw blt_exception(oss.str().c_str());
+    }
+}
+
+
+
+StreamScoper::
+StreamScoper(
+    std::ostream& os)
+    : _os(os), _tmp_os(new std::ofstream)
+{
+    _tmp_os->copyfmt(_os);
+}
+
+
+
+StreamScoper::
+~StreamScoper()
+{
+    _os.copyfmt(*_tmp_os);
+}
diff --git a/src/c++/lib/blt_util/io_util.hh b/src/c++/lib/blt_util/io_util.hh
new file mode 100644
index 0000000..df6239e
--- /dev/null
+++ b/src/c++/lib/blt_util/io_util.hh
@@ -0,0 +1,52 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <iosfwd>
+#include <memory>
+
+
+void
+open_ifstream(
+    std::ifstream& ifs,
+    const char* filename);
+
+
+/// use this class to set scope specific stream formatting
+///
+/// see unit test for example usage
+///
+struct StreamScoper
+{
+    explicit
+    StreamScoper(
+        std::ostream& os);
+
+    ~StreamScoper();
+
+private:
+    std::ostream& _os;
+    std::unique_ptr<std::ofstream> _tmp_os;
+};
diff --git a/src/c++/lib/blt_util/istream_line_splitter.cpp b/src/c++/lib/blt_util/istream_line_splitter.cpp
new file mode 100644
index 0000000..80a6922
--- /dev/null
+++ b/src/c++/lib/blt_util/istream_line_splitter.cpp
@@ -0,0 +1,161 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \file
+///
+/// an efficient (and slightly unsafe) class for basic tab-delimited files, etc...
+///
+
+/// \author Chris Saunders
+///
+
+#include "blt_util/blt_exception.hh"
+#include "blt_util/istream_line_splitter.hh"
+
+#include <cassert>
+#include <cstring>
+
+#include <iostream>
+#include <sstream>
+
+
+
+void
+istream_line_splitter::
+write_line(std::ostream& os) const
+{
+    for (unsigned i(0); i<_n_word; ++i)
+    {
+        if (i) os << _sep;
+        os << word[i];
+    }
+    os << "\n";
+}
+
+
+
+void
+istream_line_splitter::
+dump(std::ostream& os) const
+{
+    os << "\tline_no: " << _line_no << "\n";
+    os << "\tline: ";
+    write_line(os);
+}
+
+
+
+void
+istream_line_splitter::
+increase_buffer_size()
+{
+    assert(_buf_size>1);
+    const unsigned old_buf_size(_buf_size);
+    const char* old_buf(_buf);
+    _buf_size *= 2;
+    _buf=new char[_buf_size];
+    memcpy(_buf,old_buf,(old_buf_size-1)*sizeof(char));
+    delete [] old_buf;
+}
+
+
+
+static
+bool
+check_istream(std::istream& is,
+              unsigned& line_no)
+{
+    if (is)
+    {
+        line_no++;
+        // regular successful line read:
+        return true;
+    }
+
+    if     (is.eof()) return false;
+    else if (is.fail())
+    {
+        if (is.bad())
+        {
+            std::ostringstream oss;
+            oss << "ERROR: unexpected failure while attempting to read line " << (line_no+1) << "\n";
+            throw blt_exception(oss.str().c_str());
+        }
+        is.clear();
+    }
+
+    // incomplete line read in this case, have to increase buffer size:
+    return true;
+}
+
+
+
+bool
+istream_line_splitter::
+parse_line()
+{
+    _n_word=0;
+    _is.getline(_buf,_buf_size);
+    const unsigned previous_line_no(_line_no);
+    if (! check_istream(_is,_line_no)) return false; // normal eof
+    unsigned buflen(strlen(_buf));
+
+    while (((buflen+1) == _buf_size) && (previous_line_no==_line_no))
+    {
+        increase_buffer_size();
+        _is.getline(_buf+buflen,_buf_size-buflen);
+        if (! check_istream(_is,_line_no))
+        {
+            std::ostringstream oss;
+            oss << "ERROR: Unexpected read failure in parse_line() at line_no: " << _line_no << "\n";
+            throw blt_exception(oss.str().c_str());
+        }
+        buflen=(strlen(_buf));
+    }
+
+    if ((buflen+1) >_buf_size)
+    {
+        std::ostringstream oss;
+        oss << "ERROR: Unexpected read failure in parse_line() at line_no: " << _line_no << "\n";
+        throw blt_exception(oss.str().c_str());
+    }
+
+    if (nullptr == _buf) return false;
+    assert(buflen);
+
+    // do a low-level separator parse:
+    {
+        char* p(_buf);
+        word[0]=p;
+        unsigned i(1);
+        while (i<_max_word)
+        {
+            if ((*p == '\n') || (*p == '\0')) break;
+            if (*p == _sep)
+            {
+                *p = '\0';
+                word[i++] = p+1;
+            }
+            ++p;
+        }
+        _n_word=i;
+    }
+    return true;
+}
diff --git a/src/c++/lib/blt_util/istream_line_splitter.hh b/src/c++/lib/blt_util/istream_line_splitter.hh
new file mode 100644
index 0000000..24095bf
--- /dev/null
+++ b/src/c++/lib/blt_util/istream_line_splitter.hh
@@ -0,0 +1,130 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \file
+///
+/// an efficient (and slightly unsafe) class for basic tab-delimited files, etc...
+///
+
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <iosfwd>
+
+
+struct istream_line_splitter
+{
+
+    istream_line_splitter(std::istream& is,
+                          const unsigned line_buf_size=8*1024,
+                          const char word_seperator='\t',
+                          const unsigned max_word=0)
+        : _is(is)
+        , _line_no(0)
+        , _n_word(0)
+        , _buf_size(line_buf_size)
+        , _sep(word_seperator)
+        , _max_word(max_word)
+        , _buf(new char[_buf_size])
+    {
+
+        if ((0==_max_word) || (MAX_WORD_COUNT < _max_word))
+        {
+            _max_word=MAX_WORD_COUNT;
+        }
+    }
+
+    ~istream_line_splitter()
+    {
+        if (nullptr != _buf)
+        {
+            delete [] _buf;
+            _buf=nullptr;
+        }
+    }
+
+    unsigned
+    n_word() const
+    {
+        return _n_word;
+    }
+
+    /// returns false for regular end of input:
+    bool
+    parse_line();
+
+    // recreates the line before parsing
+    void
+    write_line(std::ostream& os) const;
+
+    // debug output, which provides line number and other info before calling write_line
+    void
+    dump(std::ostream& os) const;
+
+
+    enum { MAX_WORD_COUNT = 50 };
+    char* word[MAX_WORD_COUNT];
+private:
+
+    void
+    increase_buffer_size();
+
+    std::istream& _is;
+    unsigned _line_no;
+    unsigned _n_word;
+    unsigned _buf_size;
+    char _sep;
+    unsigned _max_word;
+    char* _buf;
+};
+
+
+
+#if 0
+{
+    //usage example:
+    istream_line_splitter dparse(data_is);
+
+    while (dparse.parse_line())
+    {
+        static const unsigned col_count(46);
+        if (dparse.n_word()!=col_count)
+        {
+            std::ostringstream oss;
+            oss << "ERROR: unexpected number of columns in paired export line:\n\n";
+            dparse.dump(oss);
+            throw blt_exception(oss.str().c_str());
+        }
+
+        for (unsigned i(1); (i+1)<col_count; ++i)
+        {
+            dparse.word[i][strlen(dparse.word[i])] = sep;
+        }
+        const char* nocompress_segment(dparse.word[0]);
+        const char* compress_segment(dparse.word[1]);
+
+        /// ....etc
+    }
+}
+#endif
+
+
diff --git a/src/c++/lib/blt_util/known_pos_range2.cpp b/src/c++/lib/blt_util/known_pos_range2.cpp
new file mode 100644
index 0000000..4c4306d
--- /dev/null
+++ b/src/c++/lib/blt_util/known_pos_range2.cpp
@@ -0,0 +1,41 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "blt_util/known_pos_range2.hh"
+
+#include <iostream>
+
+
+// output is always 1-indexed inclusive interval:
+//
+std::ostream& operator<<(std::ostream& os, const known_pos_range2& pr)
+{
+    os << '['
+       << pr.begin_pos()
+       << ','
+       << pr.end_pos()
+       << ')';
+
+    return os;
+}
diff --git a/src/c++/lib/blt_util/known_pos_range2.hh b/src/c++/lib/blt_util/known_pos_range2.hh
new file mode 100644
index 0000000..cfd4c3c
--- /dev/null
+++ b/src/c++/lib/blt_util/known_pos_range2.hh
@@ -0,0 +1,224 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+///
+/// this is the beginning of a redesign to known_pos_range
+/// to be more efficient for the manta case
+///
+
+#pragma once
+
+#include "blt_util/blt_types.hh"
+
+#include "boost/serialization/level.hpp"
+
+#include <algorithm>
+#include <iosfwd>
+
+
+/// \brief integer ranges which are right open
+///
+struct known_pos_range2
+{
+    known_pos_range2() :
+        known_pos_range2(0,0)
+    {}
+
+    known_pos_range2(
+        const pos_t bp,
+        const pos_t ep) :
+        _begin_pos(bp),
+        _end_pos(ep)
+    {}
+
+    void
+    set_begin_pos(const pos_t pos)
+    {
+        _begin_pos=pos;
+    }
+
+    void
+    set_end_pos(const pos_t pos)
+    {
+        _end_pos=pos;
+    }
+
+    void
+    set_range(const pos_t begin,
+              const pos_t end)
+    {
+        set_begin_pos(begin);
+        set_end_pos(end);
+    }
+
+    /// expand (or contract) range
+    void
+    expandBy(
+        const pos_t expandSize)
+    {
+        _begin_pos-=expandSize;
+        _end_pos+=expandSize;
+        if ((expandSize<0) && (_end_pos < _begin_pos))
+        {
+            _begin_pos = (_begin_pos+_end_pos)/2;
+            _end_pos = _begin_pos;
+        }
+    }
+
+    /// shift range position
+    void
+    offsetBy(
+        const pos_t offsetSize)
+    {
+        _begin_pos += offsetSize;
+        _end_pos += offsetSize;
+    }
+
+    pos_t
+    begin_pos() const
+    {
+        return _begin_pos;
+    }
+
+    pos_t
+    end_pos() const
+    {
+        return _end_pos;
+    }
+
+    pos_t
+    center_pos() const
+    {
+        return _begin_pos + ((std::max(size(),1u)-1)/2);
+    }
+
+    bool
+    is_pos_intersect(const pos_t pos) const
+    {
+        return ((pos >= _begin_pos) &&
+                (pos < _end_pos));
+    }
+
+    bool
+    is_range_intersect(const known_pos_range2& pr) const
+    {
+        return ((pr._end_pos > _begin_pos) &&
+                (pr._begin_pos < _end_pos));
+    }
+
+    /// does this range completely overlap pr?
+    bool
+    is_superset_of(const known_pos_range2& pr) const
+    {
+        return
+            ((pr._end_pos <= _end_pos) &&
+             (pr._begin_pos >= _begin_pos));
+    }
+
+    unsigned
+    size() const
+    {
+        return std::max(0,_end_pos-_begin_pos);
+    }
+
+    bool
+    operator<(const known_pos_range2& rhs) const
+    {
+        if (_begin_pos < rhs._begin_pos) return true;
+        if (_begin_pos != rhs._begin_pos) return false;
+        return (_end_pos < rhs._end_pos);
+    }
+
+    bool
+    operator==(const known_pos_range2& rhs) const
+    {
+        return ((_begin_pos==rhs._begin_pos) && (_end_pos==rhs._end_pos));
+    }
+
+    // expand range to extend of a second range:
+    void
+    merge_range(const known_pos_range2& kpr)
+    {
+        if (kpr._begin_pos<_begin_pos) _begin_pos=kpr._begin_pos;
+        if (kpr._end_pos>_end_pos) _end_pos=kpr._end_pos;
+    }
+
+    void
+    clear()
+    {
+        _begin_pos=0;
+        _end_pos=0;
+    }
+
+
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /* version */)
+    {
+        ar& _begin_pos& _end_pos;
+    }
+
+private:
+    pos_t _begin_pos;
+    pos_t _end_pos;
+};
+
+
+
+/// return the union of two ranges:
+inline
+known_pos_range2
+merge_range(
+    const known_pos_range2& kpr1,
+    const known_pos_range2& kpr2)
+{
+    known_pos_range2 res;
+    res.set_begin_pos(std::min(kpr1.begin_pos(),kpr2.begin_pos()));
+    res.set_end_pos(std::max(kpr1.end_pos(),kpr2.end_pos()));
+    return res;
+}
+
+
+/// generalized intersection test
+///
+/// this allows a positive or negative window size to be added to the range
+/// intersection test, if windowSize is 0, then this is a regular intersection test
+///
+/// For example, if windowSize is 100 this returns true if the two ranges are within 100
+/// of each other
+inline
+bool
+is_intersect_window(
+    const known_pos_range2& kpr1,
+    const known_pos_range2& kpr2,
+    const pos_t windowSize = 0)
+{
+    return (((kpr1.end_pos()+windowSize) > kpr2.begin_pos()) &&
+            ((kpr2.end_pos()+windowSize) > kpr1.begin_pos()));
+
+}
+
+std::ostream& operator<<(std::ostream& os, const known_pos_range2& pr);
+
+BOOST_CLASS_IMPLEMENTATION(known_pos_range2, boost::serialization::object_serializable)
+
diff --git a/src/c++/lib/blt_util/log.cpp b/src/c++/lib/blt_util/log.cpp
new file mode 100644
index 0000000..670d4c9
--- /dev/null
+++ b/src/c++/lib/blt_util/log.cpp
@@ -0,0 +1,30 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \file
+
+/// \author Chris Saunders
+///
+#include "blt_util/log.hh"
+
+#include <iostream>
+
+std::ostream& log_os(std::cerr);
+
diff --git a/src/c++/lib/blt_util/log.hh b/src/c++/lib/blt_util/log.hh
new file mode 100644
index 0000000..b57005c
--- /dev/null
+++ b/src/c++/lib/blt_util/log.hh
@@ -0,0 +1,29 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <iosfwd>
+
+extern std::ostream& log_os;
diff --git a/src/c++/lib/blt_util/math_util.hh b/src/c++/lib/blt_util/math_util.hh
new file mode 100644
index 0000000..36f41ed
--- /dev/null
+++ b/src/c++/lib/blt_util/math_util.hh
@@ -0,0 +1,127 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "boost/math/special_functions/log1p.hpp"
+
+#include <cmath>
+
+#include <algorithm>
+
+
+/// returns log(1+x), switches to special libc function when abs(x) is small
+///
+template <typename FloatType>
+FloatType
+log1p_switch(const FloatType x)
+{
+    // better number??
+    static const FloatType smallx_thresh(0.01);
+
+    if (std::abs(x)<smallx_thresh)
+    {
+        return boost::math::log1p(x);
+    }
+    else
+    {
+        return std::log(1+x);
+    }
+}
+
+
+/// returns equiv of log(exp(x1)+exp(x2))
+///
+template <typename FloatType>
+FloatType
+log_sum(FloatType x1, FloatType x2)
+{
+    if (x1<x2) std::swap(x1,x2);
+    return x1 + log1p_switch(std::exp(x2-x1));
+}
+
+
+// helper for median() below
+template <typename Iter>
+typename std::iterator_traits<Iter>::value_type
+_ne_median(
+    Iter begin,
+    Iter end)
+{
+    assert(begin != end);
+    const auto size(std::distance(begin,end));
+    std::nth_element(begin,begin+size/2, end);
+    return *(begin+size/2);
+}
+
+// helper for median() below
+template <typename Iter>
+typename std::iterator_traits<Iter>::value_type
+_ps_median(
+    Iter begin,
+    Iter end)
+{
+    assert(begin != end);
+    const auto size(std::distance(begin,end));
+    std::partial_sort(begin,begin+size/2+1, end);
+    return *(begin+size/2);
+}
+
+
+/// returns median, partially reorders elements in specified range
+///
+template <typename Iter>
+typename std::iterator_traits<Iter>::value_type
+median(
+    Iter begin,
+    Iter end)
+{
+    // Dispatch median call so as to avoid common broken libstdc++ impl
+
+#ifndef BROKEN_NTH_ELEMENT
+    // this is the preferred way to do it, it is optionally disabled because of common gcc bug:
+    // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58800
+    return _ne_median(begin,end);
+#else
+    return _ps_median(begin,end);
+#endif
+}
+
+
+/// standardize the div by zero guard on division
+///
+/// note while this is a relatively simple utility, it was creeping
+/// up all over the place with minor type variations. This standard
+/// library copy should consistently do the sane thing for all integral
+/// and floating point types, unless you want floating point wider than
+/// double..
+///
+template <typename A, typename B>
+double
+safeFrac(
+    const A a, const B b)
+{
+    const double bd(static_cast<double>(b));
+    return (((bd<=0.) && (bd>=0)) ? 0. : (a/bd));
+}
diff --git a/src/c++/lib/blt_util/observer.hh b/src/c++/lib/blt_util/observer.hh
new file mode 100644
index 0000000..c03b481
--- /dev/null
+++ b/src/c++/lib/blt_util/observer.hh
@@ -0,0 +1,168 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+///
+/// \brief simple observer/notifier pattern
+///
+/// see unit test for demonstration, note this is not meant to be used across threads
+///
+
+#pragma once
+
+#include <set>
+
+
+
+template <typename T>
+struct notifier;
+
+
+template <typename T>
+struct observer
+{
+    friend struct notifier<T>;
+
+    typedef observer self_t;
+
+    observer() {}
+
+    observer(const self_t&)  {} // do not copy notifier set
+
+    virtual ~observer()
+    {
+        for (typename nots_t::value_type val : _nots)
+        {
+            val->unregister_observer(this);
+        }
+    }
+
+protected:
+    void
+    observe_notifier(const notifier<T>& n)
+    {
+        n.register_observer(this);
+        _nots.insert(&n);
+    }
+
+private:
+    self_t& operator=(const self_t&);
+
+    virtual void
+    recieve_notification(const notifier<T>&,
+                         const T&) = 0;
+
+    void
+    register_notifier(const notifier<T>* n) const
+    {
+        _nots.insert(n);
+    }
+
+    void
+    unregister_notifier(const notifier<T>* n)
+    {
+        const typename nots_t::iterator i(_nots.find(n));
+        if (i != _nots.end()) _nots.erase(i);
+    }
+
+    ////////// data:
+    typedef typename std::set<const notifier<T>*> nots_t;
+    mutable nots_t _nots;
+};
+
+
+template <typename T>
+struct notifier
+{
+    friend struct observer<T>;
+
+    typedef notifier self_t;
+
+    notifier() {}
+
+    notifier(const self_t& rhs) :
+        _obss(rhs._obss)
+    {
+        for (typename obss_t::value_type val : _obss)
+        {
+            val->register_notifier(this);
+        }
+    }
+
+    self_t&
+    operator=(const self_t& rhs)
+    {
+        if (this == &rhs) return *this;
+        self_unregister();
+        _obss=rhs._obss;
+        for (typename obss_t::value_type val : _obss)
+        {
+            val->register_notifier(this);
+        }
+        return *this;
+    }
+
+    virtual ~notifier()
+    {
+        self_unregister();
+    }
+
+protected:
+    void
+    notify_observers(const T& msg) const
+    {
+        for (typename obss_t::value_type val : _obss)
+        {
+            val->recieve_notification(*this, msg);
+        }
+    }
+
+private:
+
+    void
+    self_unregister() const
+    {
+        for (typename obss_t::value_type val : _obss)
+        {
+            val->unregister_notifier(this);
+        }
+    }
+
+    void
+    register_observer(observer<T>* n) const
+    {
+        _obss.insert(n);
+    }
+
+    void
+    unregister_observer(observer<T>* n) const
+    {
+        const typename obss_t::iterator i(_obss.find(n));
+        if (i != _obss.end()) _obss.erase(i);
+    }
+
+    ////////// data:
+    typedef typename std::set<observer<T>*> obss_t;
+    mutable obss_t _obss;
+};
+
diff --git a/src/c++/lib/blt_util/parse_util.cpp b/src/c++/lib/blt_util/parse_util.cpp
new file mode 100644
index 0000000..78ff306
--- /dev/null
+++ b/src/c++/lib/blt_util/parse_util.cpp
@@ -0,0 +1,267 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "blt_util/blt_exception.hh"
+#include "blt_util/parse_util.hh"
+
+#include "boost/spirit/include/qi.hpp"
+
+#include <cerrno>
+#include <climits>
+#include <cstdlib>
+
+#include <limits>
+#include <sstream>
+
+
+
+static
+void
+parse_exception(const char* type_label,
+                const char* parse_str)
+{
+    std::ostringstream oss;
+    oss << "ERROR: Can't parse " << type_label << " from string: '" << parse_str << "'";
+    throw blt_exception(oss.str().c_str());
+}
+
+
+namespace illumina
+{
+namespace blt_util
+{
+
+static
+unsigned
+parse_unsigned_core(
+    const char* s,
+    const char*& s_out)
+{
+    static const int base(10);
+
+    errno = 0;
+
+    char* endptr;
+    const unsigned long val(strtoul(s, &endptr, base));
+    if ((errno == ERANGE && (val == ULONG_MAX || val == 0))
+        || (errno != 0 && val == 0) || (endptr == s))
+    {
+        parse_exception("unsigned long",s);
+    }
+
+    if (val > std::numeric_limits<unsigned>::max())
+    {
+        parse_exception("unsigned",s);
+    }
+
+    s_out = endptr;
+
+    return static_cast<unsigned>(val);
+}
+
+unsigned
+parse_unsigned(
+    const char*& s)
+{
+    return parse_unsigned_core(s,s);
+}
+
+unsigned
+parse_unsigned_rvalue(
+    const char* s)
+{
+    const char* s_tmp(0);
+    const unsigned val(parse_unsigned_core(s,s_tmp));
+    if (*s_tmp != '\0')
+    {
+        parse_exception("unsigned",s);
+    }
+    return val;
+}
+
+unsigned
+parse_unsigned_str(
+    const std::string& s)
+{
+    return parse_unsigned_rvalue(s.c_str());
+}
+
+
+
+static
+int
+parse_int_core(
+    const char* s,
+    const char*& s_out)
+{
+    const char* endptr(s);
+    const long val(parse_long(endptr));
+
+    if ((val > std::numeric_limits<int>::max()) ||
+        (val < std::numeric_limits<int>::min()))
+    {
+        parse_exception("int",s);
+    }
+
+    s_out = endptr;
+
+    return static_cast<int>(val);
+}
+
+int
+parse_int(
+    const char*& s)
+{
+    return parse_int_core(s,s);
+}
+
+int
+parse_int_rvalue(
+    const char* s)
+{
+    const char* s_tmp(0);
+    const int val(parse_int_core(s,s_tmp));
+    if (*s_tmp != '\0')
+    {
+        parse_exception("int",s);
+    }
+    return val;
+}
+
+int
+parse_int_str(
+    const std::string& s)
+{
+    return parse_int_rvalue(s.c_str());
+}
+
+
+
+static
+long
+parse_long_core(
+    const char* s,
+    const char*& s_out)
+{
+    static const int base(10);
+
+    errno = 0;
+
+    char* endptr;
+    const long val(strtol(s, &endptr, base));
+    if ((errno == ERANGE && (val == LONG_MIN || val == LONG_MAX))
+        || (errno != 0 && val == 0) || (endptr == s))
+    {
+        parse_exception("long int",s);
+    }
+
+    s_out = endptr;
+
+    return val;
+}
+
+long
+parse_long(
+    const char*& s)
+{
+    return parse_long_core(s,s);
+}
+
+long
+parse_long_rvalue(
+    const char* s)
+{
+    const char* s_tmp(0);
+    const long val(parse_long_core(s,s_tmp));
+    if (*s_tmp != '\0')
+    {
+        parse_exception("long int",s);
+    }
+    return val;
+}
+
+long
+parse_long_str(
+    const std::string& s)
+{
+    return parse_long_rvalue(s.c_str());
+}
+
+
+
+static
+double
+parse_double_core(
+    const char* s,
+    const char*& s_out,
+    const char* s_end)
+{
+    double val;
+    s_out = s;
+    if (s_end == nullptr) s_end=s+strlen(s);
+    bool isPass(boost::spirit::qi::parse(s_out, s_end, boost::spirit::double_, val));
+    if (isPass)
+    {
+        isPass = (s != s_out);
+    }
+    if (! isPass)
+    {
+        parse_exception("double",s);
+    }
+    return val;
+}
+
+double
+parse_double(
+    const char*& s,
+    const char* s_end)
+{
+    return parse_double_core(s,s,s_end);
+}
+
+double
+parse_double_rvalue(
+    const char* s,
+    const char* s_end)
+{
+    const char* s_tmp(0);
+    const double val(parse_double_core(s,s_tmp,s_end));
+    if (*s_tmp != '\0')
+    {
+        parse_exception("double",s);
+    }
+    return val;
+}
+
+double
+parse_double_str(
+    const std::string& s)
+{
+    const char* s2(s.c_str());
+    const char* const s2_end(s2+s.size());
+    return parse_double_rvalue(s2,s2_end);
+}
+
+}
+}
diff --git a/src/c++/lib/blt_util/parse_util.hh b/src/c++/lib/blt_util/parse_util.hh
new file mode 100644
index 0000000..a1a97d9
--- /dev/null
+++ b/src/c++/lib/blt_util/parse_util.hh
@@ -0,0 +1,152 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <string>
+
+
+namespace illumina
+{
+namespace blt_util
+{
+
+/// parse c-string to TYPE
+///
+/// tolerates a non-TYPE suffix, but a non-empty prefix must be parsable as a TYPE,
+/// on completion the value of s will reflect the extent of the parse
+///
+/// if available, specify s_end for minor performance improvement (in case of extremely large string)
+///
+unsigned
+parse_unsigned(
+    const char*& s);
+
+int
+parse_int(
+    const char*& s);
+
+long
+parse_long(
+    const char*& s);
+
+double
+parse_double(
+    const char*& s,
+    const char* s_end = nullptr);
+
+
+/// parse c-string to TYPE
+///
+/// similar to above functions but:
+/// - entire string must be convertible, no trailing suffix is allowed
+/// - appropriate for rvalue char pointers
+///
+unsigned
+parse_unsigned_rvalue(
+    const char* s);
+
+int
+parse_int_rvalue(
+    const char* s);
+
+long
+parse_long_rvalue(
+    const char* s);
+
+double
+parse_double_rvalue(
+    const char* s,
+    const char* s_end = nullptr);
+
+
+/// parse std::string to TYPE
+///
+/// entire string must be convertible, no trailing suffix is allowed
+///
+unsigned
+parse_unsigned_str(
+    const std::string& s);
+
+int
+parse_int_str(
+    const std::string& s);
+
+long
+parse_long_str(
+    const std::string& s);
+
+double
+parse_double_str(
+    const std::string& s);
+
+
+
+/// template version:
+///
+template <typename T>
+T
+parse_type(const char*&)
+{
+    static_assert(sizeof(T)==0, "no parse specialization available for type T");
+    return T();
+}
+
+
+template <>
+inline
+unsigned
+parse_type<unsigned>(const char*& s)
+{
+    return parse_unsigned(s);
+}
+
+template <>
+inline
+int
+parse_type<int>(const char*& s)
+{
+    return parse_int(s);
+}
+
+template <>
+inline
+long
+parse_type<long>(const char*& s)
+{
+    return parse_long(s);
+}
+
+template <>
+inline
+double
+parse_type<double>(const char*& s)
+{
+    return parse_double(s);
+}
+
+
+}
+}
+
diff --git a/src/c++/lib/blt_util/pos_processor_base.hh b/src/c++/lib/blt_util/pos_processor_base.hh
new file mode 100644
index 0000000..4a60f9c
--- /dev/null
+++ b/src/c++/lib/blt_util/pos_processor_base.hh
@@ -0,0 +1,58 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/blt_types.hh"
+
+
+/// \brief base for objects designed to perform work in a single pass over a position range
+///
+/// Work progress is communicated via the process_pos() method. This base class is designed to
+/// link the worker object with the stage_manager object
+///
+struct pos_processor_base
+{
+    pos_processor_base()
+        : _is_skip_process_pos(false) {}
+
+    virtual
+    ~pos_processor_base() {}
+
+    void
+    check_process_pos(const int stage_no,
+                      const pos_t pos)
+    {
+        if (_is_skip_process_pos) return;
+        process_pos(stage_no,pos);
+    }
+
+    virtual
+    void
+    process_pos(const int stage_no,
+                const pos_t pos) = 0;
+
+protected:
+    mutable bool _is_skip_process_pos;
+};
diff --git a/src/c++/lib/blt_util/pos_range.cpp b/src/c++/lib/blt_util/pos_range.cpp
new file mode 100644
index 0000000..f9791d7
--- /dev/null
+++ b/src/c++/lib/blt_util/pos_range.cpp
@@ -0,0 +1,55 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "blt_util/pos_range.hh"
+
+#include <iostream>
+
+
+// output is always 1-indexed inclusive interval:
+//
+std::ostream& operator<<(std::ostream& os, const pos_range& pr)
+{
+    os << "[";
+    if (pr.is_begin_pos)
+    {
+        os << pr.begin_pos+1;
+    }
+    else
+    {
+        os << "-inf";
+    }
+    os << " .. ";
+    if (pr.is_end_pos)
+    {
+        os << pr.end_pos;
+    }
+    else
+    {
+        os << "inf";
+    }
+    os << "]";
+
+    return os;
+}
diff --git a/src/c++/lib/blt_util/pos_range.hh b/src/c++/lib/blt_util/pos_range.hh
new file mode 100644
index 0000000..176dd22
--- /dev/null
+++ b/src/c++/lib/blt_util/pos_range.hh
@@ -0,0 +1,209 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/blt_types.hh"
+
+#include "boost/serialization/level.hpp"
+
+#include <algorithm>
+#include <iosfwd>
+
+
+/// \brief integer ranges which are potentially unbounded
+///
+/// Object handles representation, including intersection with positions
+/// and other ranges.
+///
+/// note coding convention for all ranges '_pos fields' is:
+/// XXX_begin_pos is zero-indexed position at the beginning of the range
+/// XXX_end_pos is zero-index position 1 step after the end of the range
+///
+/// any non-range pos value is assumed to be zero-indexed
+///
+struct pos_range
+{
+    pos_range() : is_begin_pos(false), is_end_pos(false), begin_pos(0), end_pos(0) {}
+
+    pos_range(const pos_t bp,const pos_t ep)
+        :  is_begin_pos(true), is_end_pos(true), begin_pos(bp), end_pos(ep) {}
+
+    void
+    clear()
+    {
+        is_begin_pos=false;
+        is_end_pos=false;
+        begin_pos=0;
+        end_pos=0;
+    }
+
+    void
+    set_begin_pos(const pos_t pos)
+    {
+        begin_pos=pos;
+        is_begin_pos=true;
+    }
+
+    void
+    set_end_pos(const pos_t pos)
+    {
+        end_pos=pos;
+        is_end_pos=true;
+    }
+
+    void
+    set_range(const pos_t begin,
+              const pos_t end)
+    {
+        set_begin_pos(begin);
+        set_end_pos(end);
+    }
+
+    bool
+    is_empty() const
+    {
+        return ! (is_begin_pos || is_end_pos);
+    }
+
+    bool
+    is_complete() const
+    {
+        return (is_begin_pos && is_end_pos);
+    }
+
+    inline
+    bool
+    is_pos_intersect(const pos_t pos) const
+    {
+        return (((! is_begin_pos) || (pos >= begin_pos)) &&
+                ((! is_end_pos) || (pos < end_pos)));
+    }
+
+    bool
+    is_range_intersect(const pos_range& pr) const
+    {
+        return (((! pr.is_end_pos) || (! is_begin_pos) || (pr.end_pos > begin_pos)) &&
+                ((! pr.is_begin_pos) || (! is_end_pos) || (pr.begin_pos < end_pos)));
+    }
+
+    /// does this range completely overlap pr?
+    bool
+    is_superset_of(const pos_range& pr) const
+    {
+        return
+            (((! is_end_pos) ||
+              ( pr.is_end_pos && (pr.end_pos <= end_pos) )) &&
+             ((! is_begin_pos) ||
+              ( pr.is_begin_pos && (pr.begin_pos >= begin_pos) )));
+    }
+
+    unsigned
+    size() const
+    {
+        if (! is_complete()) return 0;
+        return std::max(0,end_pos-begin_pos);
+    }
+
+    bool
+    operator<(const pos_range& rhs) const
+    {
+        if     ((!is_begin_pos) && rhs.is_begin_pos) return true;
+        else if ((is_begin_pos) && (!rhs.is_begin_pos)) return false;
+        else if (is_begin_pos && rhs.is_begin_pos)
+        {
+            if (begin_pos < rhs.begin_pos) return true;
+            if (begin_pos > rhs.begin_pos) return false;
+        }
+
+        if     ((!is_end_pos) && rhs.is_end_pos) return true;
+        else if (is_end_pos && rhs.is_end_pos)
+        {
+            if (end_pos < rhs.end_pos) return true;
+        }
+
+        return false;
+    }
+
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /* version */)
+    {
+        ar& is_begin_pos& is_end_pos& begin_pos& end_pos;
+    }
+
+    bool is_begin_pos;
+    bool is_end_pos;
+    pos_t begin_pos;
+    pos_t end_pos;
+};
+
+
+/// \brief pos_range for bounded intervals only
+///
+struct known_pos_range : public pos_range
+{
+    known_pos_range(const pos_t bp,const pos_t ep) : pos_range(bp,ep) {}
+
+    bool
+    operator<(const pos_range& rhs) const
+    {
+        if (begin_pos < rhs.begin_pos) return true;
+        if (begin_pos == rhs.begin_pos)
+        {
+            if (end_pos < rhs.end_pos) return true;
+        }
+        return false;
+    }
+
+    bool
+    operator==(const pos_range& rhs) const
+    {
+        return ((begin_pos==rhs.begin_pos) && (end_pos==rhs.end_pos));
+    }
+
+    // expand range to extend of a second range:
+    void
+    merge_range(const known_pos_range& kpr)
+    {
+        if (kpr.begin_pos<begin_pos) begin_pos=kpr.begin_pos;
+        if (kpr.end_pos>end_pos) end_pos=kpr.end_pos;
+    }
+
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /* version */)
+    {
+        ar& begin_pos& end_pos;
+        is_begin_pos=true;
+        is_end_pos=true;
+    }
+
+private:
+    void clear();
+};
+
+
+std::ostream& operator<<(std::ostream& os, const pos_range& pr);
+
+BOOST_CLASS_IMPLEMENTATION(pos_range, boost::serialization::object_serializable)
+
diff --git a/src/c++/lib/blt_util/prob_util.cpp b/src/c++/lib/blt_util/prob_util.cpp
new file mode 100644
index 0000000..73c1a80
--- /dev/null
+++ b/src/c++/lib/blt_util/prob_util.cpp
@@ -0,0 +1,55 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \author Chris Saunders
+///
+
+#include "blt_util/log.hh"
+#include "blt_util/prob_util.hh"
+
+#include <cstdlib>
+
+#include <iomanip>
+#include <iostream>
+
+
+
+void
+check_ln_distro_invalid_value(const char* label,
+                              const double val,
+                              const unsigned n)
+{
+    log_os << std::setprecision(14) << std::fixed;
+    log_os << "ERROR: " << label << " element [" << n << "] has invalid value: '" << val << "'\n";
+    log_os.unsetf(std::ios::fixed);
+    exit(EXIT_FAILURE);
+}
+
+
+
+void
+check_ln_distro_invalid_sum(const char* label,
+                            const double sum)
+{
+    log_os << std::setprecision(14) << std::fixed;
+    log_os << "ERROR: " << label << " sum is: '" << sum << "'\n";
+    log_os.unsetf(std::ios::fixed);
+    exit(EXIT_FAILURE);
+}
diff --git a/src/c++/lib/blt_util/prob_util.hh b/src/c++/lib/blt_util/prob_util.hh
new file mode 100644
index 0000000..000d9c2
--- /dev/null
+++ b/src/c++/lib/blt_util/prob_util.hh
@@ -0,0 +1,346 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <cassert>
+#include <cmath>
+
+#include <iterator>
+#include <type_traits>
+
+
+/// given a value on [-inf,inf], transform it
+/// to a value on [0,1]
+///
+/// This uses the simple binomial version of the 'soft-max'
+/// multinomial transformation:
+///
+/// out = exp(in) / (1 + exp(in))
+///
+/// TODO: probably some roundoff/precision changes that would improve handling of certain input ranges
+///
+template <typename FloatType>
+FloatType
+softMaxTransform(
+    const FloatType real)
+{
+    static_assert(std::is_floating_point<FloatType>::value, "Transform requires floating point type.");
+    if (real<=0.)
+    {
+        const FloatType er(std::exp(real));
+        return (er/(1.+er));
+    }
+    else
+    {
+        const FloatType enr(std::exp(-real));
+        return (1./(enr+1.));
+    }
+}
+
+template <typename FloatType>
+FloatType
+softMaxInverseTransform(
+    const FloatType ranged)
+{
+    static_assert(std::is_floating_point<FloatType>::value, "Transform requires floating point type.");
+    assert((ranged>=0) && (ranged<=1));
+    if (ranged<=0.5)
+    {
+        return std::log(ranged/(1-ranged));
+    }
+    else
+    {
+        return -std::log((1-ranged)/ranged);
+    }
+}
+
+/// helper function for softMaxTransform to map [0,1] output to [rangedMin,rangedMax] range instead:
+template <typename FloatType>
+FloatType
+softMaxTransform(
+    const FloatType real,
+    const FloatType rangedMin,
+    const FloatType rangedMax)
+{
+    const FloatType range(rangedMax-rangedMin);
+    const FloatType offset(rangedMin);
+    return softMaxTransform(real)*range + offset;
+}
+
+template <typename FloatType>
+FloatType
+softMaxInverseTransform(
+    const FloatType ranged,
+    const FloatType rangedMin,
+    const FloatType rangedMax)
+{
+    const FloatType range(rangedMax-rangedMin);
+    const FloatType offset(rangedMin);
+    return softMaxInverseTransform((ranged-offset)/range);
+}
+
+
+
+/// Given a set of N values on [-inf,inf], transform them to
+/// the probability simplex sum(N+1) < 1, where the N+1 term
+/// is implicit and does not represent a new degree of freedom
+/// in the prob distro. The function returns the final value of
+/// the probability distro.
+///
+/// this uses basic soft-max multinomial transformation:
+///
+/// out_i = exp(in_i) / (1 + sum{i}{exp(in_i)})
+///
+/// TODO: probably some roundoff/precision changes that would improve handling of certain input ranges
+///
+template <typename IterType>
+typename std::iterator_traits<IterType>::value_type
+softMaxRangeTransform(
+    const IterType begin,
+    const IterType end)
+{
+    typedef typename std::iterator_traits<IterType>::value_type value_type;
+    static_assert(std::is_floating_point<value_type>::value, "Transform requires iterators over floating point type.");
+    value_type sum(1);
+    for (IterType i(begin); i!=end; ++i)
+    {
+        *i = std::exp(*i);
+        sum += *i;
+    }
+
+    const value_type norm(1/sum);
+    value_type sum2(0);
+    for (IterType i(begin); i!=end; ++i)
+    {
+        *i *= norm;
+        sum2 += *i;
+    }
+    return (1.-sum2);
+}
+
+template <typename IterType>
+void
+softMaxInverseRangeTransform(
+    const IterType begin,
+    const IterType end)
+{
+    typedef typename std::iterator_traits<IterType>::value_type value_type;
+    static_assert(std::is_floating_point<value_type>::value, "Transform requires iterators over floating point type.");
+
+    value_type sum(0);
+    for (IterType i(begin); i!=end; ++i)
+    {
+        assert((*i>=0) && (*i<=1));
+        sum += *i;
+    }
+    assert((sum>=0) && (sum<=1));
+
+    const value_type norm(1./(1-sum));
+    for (IterType i(begin); i!=end; ++i)
+    {
+        *i = std::log(*i * norm);
+    }
+}
+
+
+/// Find more accurate complement of probability distro:
+///
+/// This function is setup assuming that 1 - prob[cgt] could
+/// create significant loss of precision due to floating point
+/// artifact, so we sum all prob[! cgt] instead. Typically this
+/// becomes valuable as prob[cgt] approaches 1.
+///
+template <typename It>
+typename std::iterator_traits<It>::value_type
+prob_comp(It begin,
+          const It end,
+          const unsigned cgt)
+{
+    typedef typename std::iterator_traits<It>::value_type float_type;
+
+    unsigned i(0);
+    float_type val(0.);
+    for (; begin!=end; ++begin,++i)
+    {
+        if (i == cgt) continue;
+        val = val + *begin;
+    }
+    return val;
+}
+
+
+/// given a log() transformed distribution, transform it to a standard
+/// distro, set max_idx to the index of the most probable component
+template <typename It>
+void
+normalize_ln_distro(const It pbegin,
+                    const It pend,
+                    unsigned& max_idx)
+{
+    typedef typename std::iterator_traits<It>::value_type float_type;
+
+    // scale and exp pprob values:
+    max_idx=0;
+    if (pbegin==pend) return;
+    float_type max(*pbegin);
+    unsigned i(1);
+    for (It p(pbegin+1); p!=pend; ++p,++i)
+    {
+        if (*p > max)
+        {
+            max = *p;
+            max_idx = i;
+        }
+    }
+
+    float_type sum(0.);
+    for (It p(pbegin); p!=pend; ++p)
+    {
+        *p = std::exp(*p-max);  // To alleviate underflow problem
+        sum += *p;
+    }
+
+    // normalize:
+    sum = 1./sum;
+    for (It p(pbegin); p!=pend; ++p)
+    {
+        *p *= sum;
+    }
+}
+
+
+// optimized version of probability normalization
+//
+// values significantly less than opt-max will be treated as zero probability
+//
+// opt-max is found within the subset of the distribution where the predicate
+// iterator is true
+//
+template <typename It,typename It2>
+void
+opt_normalize_ln_distro(const It pbegin,
+                        const It pend,
+                        const It2 pred_begin,
+                        unsigned& max_idx)
+{
+    typedef typename std::iterator_traits<It>::value_type float_type;
+
+    max_idx=0;
+    if (pbegin==pend) return;
+
+    bool is_max(false), is_opt_max(false);
+    float_type max(0), opt_max(0);
+
+    unsigned i(0);
+    It2 pred(pred_begin);
+    for (It p(pbegin); p!=pend; ++p,++pred,++i)
+    {
+        if ((! is_max) || (*p > max))
+        {
+            max = *p;
+            max_idx = i;
+            is_max=true;
+        }
+        if (((! is_opt_max) || (*p > max)) && *pred)
+        {
+            opt_max = *p;
+            is_opt_max=true;
+        }
+    }
+
+    assert(is_opt_max);
+
+    static const float_type norm_thresh(20);
+    static const float_type opt_thresh(5);
+
+    float_type sum(0.);
+    pred=(pred_begin);
+    for (It p(pbegin); p!=pend; ++p,++pred)
+    {
+        float_type mdiff(max-*p);
+        const bool is_mdiff_skip(mdiff>norm_thresh);
+        if (is_mdiff_skip)
+        {
+            if (! *pred)
+            {
+                *p=0;
+                continue;
+            }
+            float_type optdiff(opt_max-*p);
+            if (optdiff>opt_thresh)
+            {
+                *p=0;
+                continue;
+            }
+        }
+        *p = std::exp(-mdiff);
+        sum += *p;
+    }
+
+    // normalize:
+    sum = 1./sum;
+    for (It p(pbegin); p!=pend; ++p)
+    {
+        *p *= sum;
+    }
+}
+
+
+void
+check_ln_distro_invalid_value(const char* label,
+                              const double val,
+                              const unsigned n);
+
+
+void
+check_ln_distro_invalid_sum(const char* label,
+                            const double sum);
+
+
+template <typename It>
+double
+check_ln_distro(It i,
+                const It i_end,
+                const char* label,
+                const double tol = 0.00001,
+                const double target = 1)
+{
+    unsigned n(1);
+    double sum(0);
+    for (; i!=i_end; ++i,++n)
+    {
+        const double val(std::exp(*i));
+        if ((val<0.) || (val>1.))
+        {
+            check_ln_distro_invalid_value(label,val,n);
+        }
+        sum += val;
+    }
+    if (std::abs(sum-target) > tol)
+    {
+        check_ln_distro_invalid_sum(label,sum);
+    }
+    return sum;
+}
diff --git a/src/c++/lib/blt_util/qscore.hh b/src/c++/lib/blt_util/qscore.hh
new file mode 100644
index 0000000..59154b8
--- /dev/null
+++ b/src/c++/lib/blt_util/qscore.hh
@@ -0,0 +1,124 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/qscore_cache.hh"
+
+#include <cmath>
+
+#include <algorithm>
+#include <limits>
+
+
+//
+inline
+int
+char_to_qval(const char c)
+{
+    return (static_cast<int>(c)-64);
+}
+
+template <typename FloatType>
+FloatType
+error_prob_to_phred(const FloatType prob)
+{
+    static const FloatType minlog10(static_cast<FloatType>(std::numeric_limits<FloatType>::min_exponent10));
+    return -10.*std::max(minlog10,std::log10(prob));
+}
+
+template <typename FloatType>
+FloatType
+ln_error_prob_to_phred(const FloatType lnProb)
+{
+    static const FloatType minlog10(static_cast<FloatType>(std::numeric_limits<FloatType>::min_exponent10));
+    static const FloatType ln10(std::log(static_cast<FloatType>(10)));
+    return -10.*std::max(minlog10,lnProb/ln10);
+}
+
+
+template <typename FloatType>
+int
+error_prob_to_qphred(const FloatType prob)
+{
+    return static_cast<int>(std::floor(error_prob_to_phred(prob)+0.5));
+}
+
+template <typename FloatType>
+int
+ln_error_prob_to_qphred(const FloatType lnProb)
+{
+    return static_cast<int>(std::floor(ln_error_prob_to_phred(lnProb)+0.5));
+}
+
+
+inline
+double
+phred_to_error_prob(const double val)
+{
+    return std::pow(10.,-val/10.);
+}
+
+inline
+double
+qphred_to_error_prob(const int qscore)
+{
+    return qphred_cache::get_error_prob(qscore);
+}
+
+inline
+double
+qphred_to_ln_comp_error_prob(const int qscore)
+{
+    return qphred_cache::get_ln_comp_error_prob(qscore);
+}
+
+inline
+double
+qphred_to_ln_error_prob(const int qscore)
+{
+    return qphred_cache::get_ln_error_prob(qscore);
+}
+
+
+// modify basecall error_prob score according to mapping quality of the
+// read:
+inline
+double
+phred_to_mapped_error_prob(const double basecall_val,
+                           const double mapping_val)
+{
+    const double be(phred_to_error_prob(basecall_val));
+    const double me(phred_to_error_prob(mapping_val));
+    return ((1.-me)*be)+(me*0.75);
+}
+
+inline
+int
+qphred_to_mapped_qphred(const int basecall_val,
+                        const int mapping_val)
+{
+    return qphred_cache::get_mapped_qscore(basecall_val,mapping_val);
+}
+
diff --git a/src/c++/lib/blt_util/qscore_cache.cpp b/src/c++/lib/blt_util/qscore_cache.cpp
new file mode 100644
index 0000000..2530ec0
--- /dev/null
+++ b/src/c++/lib/blt_util/qscore_cache.cpp
@@ -0,0 +1,76 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "blt_util/blt_exception.hh"
+#include "blt_util/log.hh"
+#include "blt_util/math_util.hh"
+#include "blt_util/qscore.hh"
+#include "blt_util/qscore_cache.hh"
+
+#include <cstdlib>
+#include <iostream>
+#include <sstream>
+
+
+
+qphred_cache::
+qphred_cache()
+{
+    static const double q2lnp(-std::log(10.)/10.);
+
+    for (int i(0); i<=MAX_QSCORE; ++i)
+    {
+        q2p[i] = phred_to_error_prob(static_cast<double>(i));
+        q2lncompe[i] = log1p_switch(-q2p[i]);
+        q2lne[i] = static_cast<double>(i)*q2lnp;
+        for (int j(0); j<=MAX_MAP; ++j)
+        {
+            mappedq[j][i] = error_prob_to_qphred(phred_to_mapped_error_prob(i,j));
+        }
+    }
+}
+
+
+
+void
+qphred_cache::
+invalid_qscore_error(const int qscore,
+                     const char* label)
+{
+    std::stringstream oss;
+    oss << "ERROR: Attempting to lookup invalid " << label << " score: " << qscore;
+    throw blt_exception(oss.str().c_str());
+}
+
+
+void
+qphred_cache::
+high_qscore_error(const int qscore,
+                  const char* label)
+{
+    std::stringstream oss;
+    oss << "ERROR: Attempting to lookup " << label << " score " << qscore << " which exceeds the maximum cached " << label << " score of " <<  MAX_QSCORE;
+    throw blt_exception(oss.str().c_str());
+}
+
diff --git a/src/c++/lib/blt_util/qscore_cache.hh b/src/c++/lib/blt_util/qscore_cache.hh
new file mode 100644
index 0000000..2d04449
--- /dev/null
+++ b/src/c++/lib/blt_util/qscore_cache.hh
@@ -0,0 +1,140 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+
+#include <array>
+
+
+/// singleton helper class for qscore.hh
+struct qphred_cache
+{
+    static
+    double
+    get_error_prob(const int qscore)
+    {
+        return qc().get_error_prob_imp(qscore);
+    }
+
+    static
+    double
+    get_ln_comp_error_prob(const int qscore)
+    {
+        return qc().get_lncompe_imp(qscore);
+    }
+
+    static
+    double
+    get_ln_error_prob(const int qscore)
+    {
+        return qc().get_lne_imp(qscore);
+    }
+
+    static
+    int
+    get_mapped_qscore(const int basecall_qscore,
+                      const int mapping_qscore)
+    {
+        return qc().get_mapped_qscore_imp(basecall_qscore,mapping_qscore);
+    }
+
+    enum { MAX_QSCORE = 70,
+           MAX_MAP = 90
+         };
+
+    static
+    void
+    qscore_check(const int qscore,
+                 const char* label)
+    {
+        if (qscore < 0) invalid_qscore_error(qscore,label);
+        if (qscore > MAX_QSCORE) high_qscore_error(qscore,label);
+    }
+
+private:
+    qphred_cache();
+
+    static
+    const qphred_cache&
+    qc()
+    {
+        static const qphred_cache qc;
+        return qc;
+    }
+
+    static void invalid_qscore_error(const int qscore, const char* label);
+    static void high_qscore_error(const int qscore, const char* label);
+
+    static
+    void
+    qscore_check_int(const int qscore)
+    {
+        static const char* label = "phred";
+        qscore_check(qscore,label);
+    }
+
+    double
+    get_error_prob_imp(const int qscore) const
+    {
+        qscore_check_int(qscore);
+        return q2p[qscore];
+    }
+
+    double
+    get_lncompe_imp(const int qscore) const
+    {
+        qscore_check_int(qscore);
+        return q2lncompe[qscore];
+    }
+
+    double
+    get_lne_imp(const int qscore) const
+    {
+        qscore_check_int(qscore);
+        return q2lne[qscore];
+    }
+
+    int
+    get_mapped_qscore_imp(const int basecall_qscore,
+                          int mapping_qscore) const
+    {
+        static const char* label = "basecall quality";
+        qscore_check(basecall_qscore,label);
+        assert(mapping_qscore>=0);
+        if (mapping_qscore>MAX_MAP)
+        {
+            mapping_qscore=MAX_MAP;
+        }
+        return mappedq[mapping_qscore][basecall_qscore];
+    }
+
+    std::array<double,MAX_QSCORE+1> q2p;
+    std::array<double,MAX_QSCORE+1> q2lncompe;
+    std::array<double,MAX_QSCORE+1> q2lne;
+    uint8_t mappedq[MAX_MAP+1][MAX_QSCORE+1];
+};
+
diff --git a/src/c++/lib/blt_util/qscore_snp.cpp b/src/c++/lib/blt_util/qscore_snp.cpp
new file mode 100644
index 0000000..b763419
--- /dev/null
+++ b/src/c++/lib/blt_util/qscore_snp.cpp
@@ -0,0 +1,47 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "blt_util/qscore_snp.hh"
+#include "blt_util/qscore.hh"
+#include "blt_util/math_util.hh"
+
+
+
+qscore_snp::
+qscore_snp(
+    const double snp_prob)
+{
+    static const int MAX_QSCORE(qphred_cache::MAX_QSCORE);
+
+    const double comp_snp3(1.-(snp_prob/3.));
+
+    for (int i(0); i<=MAX_QSCORE; ++i)
+    {
+        const double qerr(phred_to_error_prob(static_cast<double>(i)));
+        _q2p[i] = (qerr * comp_snp3) + ((1-qerr) * snp_prob);
+        _q2lncompe[i] = log1p_switch(-_q2p[i]);
+        _q2lne[i] = std::log(_q2p[i]);
+    }
+}
+
diff --git a/src/c++/lib/blt_util/qscore_snp.hh b/src/c++/lib/blt_util/qscore_snp.hh
new file mode 100644
index 0000000..574d09b
--- /dev/null
+++ b/src/c++/lib/blt_util/qscore_snp.hh
@@ -0,0 +1,65 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/qscore_cache.hh"
+
+
+/// this object provides a variation on regular qscores by incorporating SNP probability
+///
+/// SNV probability limits the error if we have to explain the difference
+/// between a read and the reference, however it should not be used in cases where the
+/// biological variation has already been accounted for in the comparison.
+///
+struct qscore_snp
+{
+    qscore_snp(const double snp_prob);
+
+    double
+    qphred_to_error_prob(const int qscore) const
+    {
+        qphred_cache::qscore_check(qscore, "basecall quality");
+        return _q2p[qscore];
+    }
+
+    double
+    qphred_to_ln_comp_error_prob(const int qscore) const
+    {
+        qphred_cache::qscore_check(qscore, "basecall quality");
+        return _q2lncompe[qscore];
+    }
+
+    double
+    qphred_to_ln_error_prob(const int qscore) const
+    {
+        qphred_cache::qscore_check(qscore, "basecall quality");
+        return _q2lne[qscore];
+    }
+
+private:
+    double _q2p[qphred_cache::MAX_QSCORE+1];
+    double _q2lncompe[qphred_cache::MAX_QSCORE+1];
+    double _q2lne[qphred_cache::MAX_QSCORE+1];
+};
diff --git a/src/c++/lib/blt_util/reference_contig_segment.hh b/src/c++/lib/blt_util/reference_contig_segment.hh
new file mode 100644
index 0000000..f7b3aec
--- /dev/null
+++ b/src/c++/lib/blt_util/reference_contig_segment.hh
@@ -0,0 +1,114 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/blt_types.hh"
+
+#include <string>
+
+
+/// Manages a partial reference sequence segment
+///
+/// This object holds the reference sequence specified by the current
+/// run's begin and end range, plus some padding on each side. To get
+/// this integrated into the current code as quickly as possible it
+/// currently exposes the internal string object holding the sequence
+/// data. When time allows this will be restricted so that a compressed
+/// internal object can be used.
+///
+struct reference_contig_segment
+{
+    reference_contig_segment()
+        : _offset(0)
+    {}
+
+    char
+    get_base(const pos_t pos) const
+    {
+        if (pos<_offset || pos>=end()) return 'N';
+        return _seq[pos-_offset];
+    }
+
+    void
+    get_substring(const pos_t pos,
+                  const pos_t length,
+                  std::string& substr) const
+    {
+
+        if (pos<_offset || (pos+length)>end())
+        {
+            //slow path (minority of calls):
+            substr.clear();
+            for (int i(0); i<length; ++i)
+            {
+                substr.push_back(get_base(pos+i));
+            }
+        }
+        else
+        {
+            //fast path
+            substr.assign(_seq,pos-_offset,length);
+        }
+    }
+
+    std::string& seq()
+    {
+        return _seq;
+    }
+    const std::string& seq() const
+    {
+        return _seq;
+    }
+
+    pos_t
+    get_offset() const
+    {
+        return _offset;
+    }
+
+    void
+    set_offset(const pos_t offset)
+    {
+        _offset=offset;
+    }
+
+    pos_t
+    end() const
+    {
+        return _offset+_seq.size();
+    }
+
+    void
+    clear()
+    {
+        _offset=0;
+        _seq.clear();
+    }
+
+private:
+
+    pos_t _offset;
+    std::string _seq;
+};
diff --git a/src/c++/lib/blt_util/seq_printer.cpp b/src/c++/lib/blt_util/seq_printer.cpp
new file mode 100644
index 0000000..429c66d
--- /dev/null
+++ b/src/c++/lib/blt_util/seq_printer.cpp
@@ -0,0 +1,55 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \author Chris Saunders
+///
+
+#include "blt_util/seq_printer.hh"
+
+#include <cassert>
+#include <cstring>
+
+#include <iostream>
+
+
+
+/// pretty print sequence is such a way that it's easy to locate position number
+///
+void
+printSeq(
+    const char* seq,
+    std::ostream& os)
+{
+    static const unsigned rowSize(100);
+    static const unsigned sectionSize(10);
+
+    assert(NULL != seq);
+    const unsigned seqLen(strlen(seq));
+
+    for (unsigned i(0); i<seqLen; ++i)
+    {
+        if (i)
+        {
+            if      (0 == (i % rowSize))     os << '\n';
+            else if (0 == (i % sectionSize)) os << ' ';
+        }
+        os << seq[i];
+    }
+}
diff --git a/src/c++/lib/blt_util/seq_printer.hh b/src/c++/lib/blt_util/seq_printer.hh
new file mode 100644
index 0000000..acce8e5
--- /dev/null
+++ b/src/c++/lib/blt_util/seq_printer.hh
@@ -0,0 +1,45 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <iosfwd>
+#include <string>
+
+
+/// pretty print sequence in such a way that it's easy to locate position number
+///
+void
+printSeq(
+    const char* seq,
+    std::ostream& os);
+
+
+inline
+void
+printSeq(
+    const std::string& seq,
+    std::ostream& os)
+{
+    printSeq(seq.c_str(),os);
+}
diff --git a/src/c++/lib/blt_util/seq_util.cpp b/src/c++/lib/blt_util/seq_util.cpp
new file mode 100644
index 0000000..f5325b6
--- /dev/null
+++ b/src/c++/lib/blt_util/seq_util.cpp
@@ -0,0 +1,175 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \author Chris Saunders
+///
+#include "blt_util/log.hh"
+#include "blt_util/seq_util.hh"
+
+#include <cassert>
+#include <cstdlib>
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+
+
+
+void
+base_error(const char* func, const char a)
+{
+    log_os << "ERROR:: Invalid base in " << func << ".\n"
+           << "\t\tinvalid base (char): '" << a << "'\n"
+           << "\t\tinvalid base (int): " << static_cast<int>(a) << "\n";
+    exit(EXIT_FAILURE);
+}
+
+
+
+void
+id_to_base_error(const uint8_t i)
+{
+    log_os << "ERROR:: Invalid id in id_to_base. id: " << i << "\n";
+    exit(EXIT_FAILURE);
+}
+
+
+
+bool
+is_valid_seq(const char* seq)
+{
+    assert(NULL != seq);
+
+    while (*seq !=  '\0')
+    {
+        if (! is_valid_base(*seq)) return false;
+        seq++;
+    }
+    return true;
+}
+
+
+
+void
+standardize_ref_seq(const char* ref_seq_file,
+                    const char* chr_name,
+                    std::string& ref_seq,
+                    const pos_t offset)
+{
+    const std::string::size_type ref_size(ref_seq.size());
+    for (std::string::size_type i(0); i<ref_size; ++i)
+    {
+        const char old_ref(ref_seq[i]);
+        char c(old_ref);
+        if (islower(c)) c = toupper(c);
+        if (! is_valid_base(c))
+        {
+            if (! is_iupac_base(c))
+            {
+                static const char def_chr_name[] = "first-sequence-in-file";
+                const char* seq_name(NULL != chr_name ? chr_name : def_chr_name);
+
+                log_os << "ERROR:: Unexpected character in reference sequence.\n";
+                log_os << "\treference_sequence_file: '" << ref_seq_file << "'\n";
+                log_os << "\tchromosome: '" << seq_name << "'\n";
+                log_os << "\tcharacter: '" << old_ref << "'\n";
+                log_os << "\tcharacter_decimal_index: " << static_cast<int>(old_ref) << "\n";
+                log_os << "\tcharacter_position_in_chromosome: " << (i+1+offset) << "\n";
+                exit(EXIT_FAILURE);
+            }
+            c=elandize_base(c);
+        }
+        if (c != old_ref) ref_seq[i] = c;
+    }
+}
+
+
+
+std::size_t
+get_ref_seq_known_size(const reference_contig_segment& ref,
+                       const pos_range pr)
+{
+    pos_t b(0);
+    pos_t end(ref.end());
+    if (pr.is_begin_pos && (pr.begin_pos>0)) b=pr.begin_pos;
+    if (pr.is_end_pos && (pr.end_pos>0)) end=std::min(end,pr.end_pos);
+    std::size_t size(0);
+    for (; b<end; ++b)
+    {
+        if (ref.get_base(b) != 'N') size++;
+    }
+    return size;
+}
+
+
+
+void
+get_seq_repeat_unit(
+    const std::string& seq,
+    std::string& repeat_unit,
+    unsigned& repeat_count)
+{
+    const std::string::size_type sg(seq.find('-'));
+    const unsigned seq_size((sg!=std::string::npos) ? sg : seq.size());
+
+    // check all divisors of seq_size until a repeat is found:
+    for (unsigned i(1); i<seq_size; ++i)
+    {
+        /// TODO -- find a real way to get the divisor list, this
+        /// isn't very important because indels are so small it
+        /// almost doesn't matter.
+        if ((seq_size%i) != 0) continue;
+
+        bool is_repeat(true);
+        for (unsigned j(i); j<seq_size; j += i)
+        {
+            for (unsigned k(0); k<i; ++k)
+            {
+                if (seq[j+k] != seq[k])
+                {
+                    is_repeat=false;
+                    break;
+                }
+            }
+            if (! is_repeat) break;
+        }
+        if (is_repeat)
+        {
+            repeat_unit = seq.substr(0,i);
+            repeat_count = seq_size/i;
+            return;
+        }
+    }
+
+    repeat_unit = seq;
+    repeat_count = 1;
+}
+
+
+
+void
+get_vcf_seq_repeat_unit(
+    const std::string& seq,
+    std::string& repeat_unit,
+    unsigned& repeat_count)
+{
+    assert(! seq.empty());
+    get_seq_repeat_unit(seq.substr(1),repeat_unit,repeat_count);
+}
diff --git a/src/c++/lib/blt_util/seq_util.hh b/src/c++/lib/blt_util/seq_util.hh
new file mode 100644
index 0000000..1c2378c
--- /dev/null
+++ b/src/c++/lib/blt_util/seq_util.hh
@@ -0,0 +1,324 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/blt_types.hh"
+#include "blt_util/pos_range.hh"
+#include "blt_util/reference_contig_segment.hh"
+
+#include <cstring>
+
+#include <iterator>
+#include <string>
+
+
+namespace BASE_ID
+{
+enum index_t
+{
+    A,
+    C,
+    G,
+    T,
+    ANY,
+    SIZE
+};
+}
+
+enum { N_BASE=4 };
+
+void
+base_error(const char* func,
+           const char a);
+
+inline
+uint8_t
+base_to_id(const char a)
+{
+    using namespace BASE_ID;
+    switch (a)
+    {
+    case 'A':
+        return A;
+    case 'C':
+        return C;
+    case 'G':
+        return G;
+    case 'T':
+        return T;
+    case 'N':
+        return ANY;
+    default:
+        base_error("base_to_id",a);
+        return 4;
+    }
+}
+
+void
+id_to_base_error(const uint8_t i);
+
+inline
+char
+id_to_base(const uint8_t i)
+{
+    static const char base[] = "ACGTN";
+
+    if (i>N_BASE) id_to_base_error(i);
+    return base[i];
+}
+
+
+
+/// valid in the ELAND sense [ACGTN]
+inline
+bool
+is_valid_base(char a)
+{
+    switch (a)
+    {
+    case 'A':
+    case 'C':
+    case 'G':
+    case 'T':
+    case 'N':
+        return true;
+    default :
+        return false;
+    }
+}
+
+inline
+bool
+is_iupac_base(char a)
+{
+    switch (a)
+    {
+    case 'A':
+    case 'C':
+    case 'G':
+    case 'U':
+    case 'T':
+    case 'R':
+    case 'Y':
+    case 'S':
+    case 'W':
+    case 'K':
+    case 'M':
+    case 'B':
+    case 'D':
+    case 'H':
+    case 'V':
+    case '.':
+    case '-':
+    case 'N':
+        return true;
+    default :
+        return false;
+    }
+}
+
+/// valid in the ELAND sense [ACGTN]
+bool
+is_valid_seq(const char* seq);
+
+inline
+char
+elandize_base(char a)
+{
+    switch (a)
+    {
+    case 'A':
+        return 'A';
+    case 'C':
+        return 'C';
+    case 'G':
+        return 'G';
+    case 'U':
+    case 'T':
+        return 'T';
+    case 'R':
+    case 'Y':
+    case 'S':
+    case 'W':
+    case 'K':
+    case 'M':
+    case 'B':
+    case 'D':
+    case 'H':
+    case 'V':
+    case '.':
+    case '-':
+    case 'N':
+        return 'N';
+    default:
+        base_error("elandize_base",a);
+        return 'N';
+    }
+}
+
+inline
+char
+comp_base(char a)
+{
+    switch (a)
+    {
+    case 'A':
+        return 'T';
+    case 'C':
+        return 'G';
+    case 'G':
+        return 'C';
+    case 'T':
+        return 'A';
+    case 'N':
+        return 'N';
+    default:
+        base_error("comp_base",a);
+        return 'N';
+    }
+}
+
+inline
+char
+get_seq_base(const char* seq,
+             const pos_t size,
+             const pos_t pos)
+{
+    if ((pos<0) || (pos>=size))
+    {
+        return 'N';
+    }
+    else
+    {
+        return seq[pos];
+    }
+}
+
+inline
+char
+get_seq_base(const std::string& seq,
+             const pos_t pos)
+{
+    return get_seq_base(seq.c_str(),seq.size(),pos);
+}
+
+// generalized in-place revcomp -- requires bidirectional iterators
+//
+template <typename Iter>
+void
+reverseComp(Iter b,Iter e)
+{
+    char t;
+    for (; b!=e; ++b)
+    {
+        if ((--e)==b)
+        {
+            *b=comp_base(*b);
+            break;
+        }
+        t=comp_base(*b);
+        *b=comp_base(*e);
+        *e=t;
+    }
+}
+
+// easy string version:
+inline
+void
+reverseCompStr(std::string& seq)
+{
+    reverseComp(seq.begin(),seq.end());
+}
+
+
+template <typename T> void fixCstring(T) {}
+inline void fixCstring(char* b)
+{
+    *b='\0';
+}
+
+
+// generalized copy revcomp -- requires bidirectional iterators
+//
+template <typename ConstIter,typename Iter>
+void
+reverseCompCopy(ConstIter cb,ConstIter ce,Iter b)
+{
+    while (cb!=ce)
+    {
+        *b++ = comp_base(*--ce);
+    }
+    fixCstring(b);
+}
+
+// easy char*->string version:
+inline
+std::string
+reverseCompCopyCStr(const char* str)
+{
+    std::string result;
+    reverseCompCopy(str,str+strlen(str),
+                    std::back_insert_iterator<std::string>(result));
+    return result;
+}
+
+// easy string->string version:
+inline
+std::string
+reverseCompCopyStr(const std::string& seq)
+{
+    std::string result;
+    reverseCompCopy(seq.begin(),seq.end(),
+                    std::back_insert_iterator<std::string>(result));
+    return result;
+}
+
+/// Standardize reference sequence to [ACGTN]. Fail when non-IUPAC
+/// character is found.
+void
+standardize_ref_seq(const char* ref_seq_file,
+                    const char* chr_name,
+                    std::string& ref_seq,
+                    const pos_t offset);
+
+std::size_t
+get_ref_seq_known_size(const reference_contig_segment& ref_seq,
+                       const pos_range pr);
+
+/// Looks for the smallest possible perfect repeat in seq
+///
+void
+get_seq_repeat_unit(
+    const std::string& seq,
+    std::string& repeat_unit,
+    unsigned& repeat_count);
+
+/// Same as above but removes first base from seq
+///
+void
+get_vcf_seq_repeat_unit(
+    const std::string& seq,
+    std::string& repeat_unit,
+    unsigned& repeat_count);
diff --git a/src/c++/lib/blt_util/set_util.hh b/src/c++/lib/blt_util/set_util.hh
new file mode 100644
index 0000000..48b2c86
--- /dev/null
+++ b/src/c++/lib/blt_util/set_util.hh
@@ -0,0 +1,60 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <set>
+
+
+/// implements B -= A
+template <typename T>
+void
+inplaceSetSubtract(
+    const std::set<T>& A,
+    std::set<T>& B)
+{
+    typedef typename std::set<T>::const_iterator scit;
+    scit ait(A.begin()), ait_end(A.end());
+    scit bit(B.begin()), bit_end(B.end());
+    while ( (bit != bit_end) && (ait != ait_end) )
+    {
+        if (*ait < *bit)
+        {
+            ++ait;
+        }
+        else
+        {
+            if (*ait == *bit)
+            {
+                const scit blast(bit);
+                ++bit;
+                B.erase(blast);
+            }
+            else
+            {
+                ++bit;
+            }
+        }
+    }
+}
diff --git a/src/c++/lib/blt_util/sig_handler.cpp b/src/c++/lib/blt_util/sig_handler.cpp
new file mode 100644
index 0000000..0af8d9b
--- /dev/null
+++ b/src/c++/lib/blt_util/sig_handler.cpp
@@ -0,0 +1,73 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "blt_util/log.hh"
+#include "blt_util/sig_handler.hh"
+
+#include <cstdlib>
+#include <signal.h>
+
+#include <iostream>
+#include <string>
+
+
+static std::string _progname;
+static std::string _cmdline;
+
+
+
+static
+void
+blt_sig_handler (int sig)
+{
+    switch (sig)
+    {
+    case SIGTERM:
+        log_os << "ERROR: " << _progname << " received termination signal. cmdline: " << _cmdline << std::endl;
+        exit(EXIT_FAILURE);
+#ifndef _WIN32
+    case SIGINT:
+        log_os << "ERROR: " << _progname << " received interrupt signal. cmdline: " << _cmdline << std::endl;
+        exit(EXIT_FAILURE);
+#endif
+    default:
+        log_os << "INFO: " << _progname << " received signal no: " << sig << std::endl;
+        break;
+    }
+}
+
+
+
+void
+initialize_blt_signals(const char* progname,
+                       const char* cmdline)
+{
+    _progname=progname;
+    _cmdline=cmdline;
+
+    signal(SIGTERM, blt_sig_handler);
+#ifndef _WIN32
+    signal(SIGINT, blt_sig_handler);
+#endif
+}
diff --git a/src/c++/lib/blt_util/sig_handler.hh b/src/c++/lib/blt_util/sig_handler.hh
new file mode 100644
index 0000000..1c5d060
--- /dev/null
+++ b/src/c++/lib/blt_util/sig_handler.hh
@@ -0,0 +1,31 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+/// \brief logs sigint/sigterm events (with cmdline):
+///
+void
+initialize_blt_signals(const char* progname,
+                       const char* cmdline);
diff --git a/src/c++/lib/blt_util/stage_manager.cpp b/src/c++/lib/blt_util/stage_manager.cpp
new file mode 100644
index 0000000..7ee16ea
--- /dev/null
+++ b/src/c++/lib/blt_util/stage_manager.cpp
@@ -0,0 +1,371 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "blt_util/blt_exception.hh"
+#include "blt_util/log.hh"
+#include "blt_util/stage_manager.hh"
+
+#include <cstdlib>
+
+#include <algorithm>
+#include <iostream>
+#include <sstream>
+
+
+
+void
+stage_data::
+add_stage(const int id,
+          const int parent_id,
+          const unsigned parent_distance,
+          const bool is_parent)
+{
+    unsigned pos(0);
+    if (is_parent)
+    {
+        idmap_t::iterator pit(_ids.find(parent_id));
+
+        if (pit==_ids.end())
+        {
+            std::ostringstream oss;
+            oss << "ERROR: stage_data.add_stage() parent_id " << parent_id << " does not exist\n";
+            throw blt_exception(oss.str().c_str());
+        }
+
+        pos=(pit->second+parent_distance);
+    }
+    const std::pair<idmap_t::iterator,bool> ret(_ids.insert(std::make_pair(id,pos)));
+    if (! ret.second)
+    {
+        std::ostringstream oss;
+        oss << "ERROR: stage_data.add_stage() id " << id << " already exists\n";
+        throw blt_exception(oss.str().c_str());
+    }
+    _stage_pos.push_back(std::make_pair(pos,id));
+    // not efficient to do this every time, but we always expect the
+    // number of added stages to be very small:
+    std::sort(_stage_pos.begin(),_stage_pos.end());
+}
+
+
+
+void
+stage_data::
+unknown_id_error(const int id) const
+{
+    std::ostringstream oss;
+    oss << "ERROR: unknown stage_id requested: " << id << "\n";
+    throw blt_exception(oss.str().c_str());
+}
+
+
+void
+stage_data::
+dump(std::ostream& os) const
+{
+    os << "stage_pos:\n";
+    for (const auto& val : _stage_pos)
+    {
+        os << "pos: " << val.first << " id: " << val.second << "\n";
+    }
+}
+
+
+
+stage_manager::
+stage_manager(const stage_data& sdata,
+              const pos_range& report_range,
+              pos_processor_base& ppb)
+    : _sdata(sdata)
+    , _is_head_pos(false)
+    , _is_first_pos_set(false)
+    , _report_range(report_range)
+    , _ppb(ppb)
+    , _stage_pos_ptr(&(_sdata.stage_pos()))
+    , _stage_size(_stage_pos_ptr->size())
+    , _is_minpos(_stage_size,0)
+    , _minpos(_stage_size,0)
+    , _is_any_minpos(0)
+{
+    assert(_stage_size>0);
+}
+
+void
+stage_manager::
+revise_stage_data(const stage_data& sdata)
+{
+    // check to see if new sdata qualifies against the restrictions
+    // of data revision:
+    const stage_data::stage_pos_t& sp(sdata.stage_pos());
+    const unsigned sps(sp.size());
+
+    assert(sps==_stage_size);
+
+    for (unsigned i(0); i<sps; ++i)
+    {
+        const pos_t new_pos(sp[i].first);
+        const int new_id(sp[i].second);
+        const pos_t old_pos(_sdata.get_stage_id_shift(new_id));
+        assert(old_pos<=new_pos);
+    }
+    // passed!!
+
+    // map up any old minimum values:
+    std::map<int,std::pair<bool,pos_t> > old_minpos;
+    for (unsigned i(0); i<sps; ++i)
+    {
+        const int old_id(_stage_pos_ptr->operator[](i).second);
+        old_minpos[old_id] = std::make_pair((_is_minpos[i] != 0),_minpos[i]);
+    }
+
+    // create new minimum values:
+    for (unsigned i(0); i<sps; ++i)
+    {
+        const pos_t new_pos(sp[i].first);
+        const int new_id(sp[i].second);
+        const pos_t old_pos(_sdata.get_stage_id_shift(new_id));
+        _is_minpos[i] = old_minpos[new_id].first;
+        _minpos[i] = old_minpos[new_id].second;
+        if (old_pos==new_pos || (!_is_head_pos)) continue;
+        _is_minpos[i] = 1;
+        _is_any_minpos = 1;
+        _minpos[i] = std::max(_minpos[i],_head_pos-old_pos);
+    }
+
+    // transfer new value in:
+    _sdata=sdata;
+    _stage_pos_ptr=&(_sdata.stage_pos());
+
+}
+
+
+void
+stage_manager::
+reset()
+{
+    if (_is_first_pos_set)
+    {
+        if (_report_range.is_end_pos)
+        {
+            pos_t final_pos(_report_range.end_pos);
+            for (pos_t i(_max_pos+1); i<final_pos; ++i)
+            {
+                process_pos(i);
+            }
+        }
+    }
+    else if (_report_range.is_begin_pos && _report_range.is_end_pos)
+    {
+        // never read any data in this case, so we just write out
+        // the approriate range of zeros for consistency:
+        //
+        _min_pos=_report_range.begin_pos;
+        const pos_t end(_report_range.end_pos);
+        for (pos_t i(_report_range.begin_pos); i<end; ++i)
+        {
+            process_pos(i);
+        }
+    }
+    finish_process_pos();
+
+    // reset to ground state:
+    _is_first_pos_set=false;
+    _is_head_pos=false;
+}
+
+
+
+void
+stage_manager::
+handle_new_pos_value(const pos_t pos)
+{
+    if (! _is_first_pos_set)
+    {
+        _max_pos = pos;
+        _min_pos = pos;
+        if (_report_range.is_begin_pos)
+        {
+            _min_pos = _report_range.begin_pos;
+        }
+        for (pos_t i(_min_pos); i<=pos; ++i) process_pos(i);
+        _is_first_pos_set = true;
+    }
+
+    if (pos < _min_pos)
+    {
+        _min_pos = pos;
+    }
+
+    if (pos > _max_pos)
+    {
+        // process older positions:
+        if (! _is_head_pos)
+        {
+            _head_pos = _max_pos+1;
+            _is_head_pos = true;
+        }
+        process_pos(pos);
+        //for(pos_t i(_max_pos+1);i<=pos;++i) process_pos(i);
+        _max_pos = pos;
+    }
+}
+
+
+
+// new positional information has to fit into the buffer for its
+// stage:
+//
+bool
+stage_manager::
+is_new_pos_value_valid(const pos_t pos,
+                       const int stage_id)
+{
+    // get fshift first to validate stage_id:
+    const pos_t fshift(_sdata.get_stage_id_shift(stage_id));
+    if (! _is_first_pos_set) return true;
+    return (pos > (_max_pos-fshift));
+}
+
+
+
+void
+stage_manager::
+validate_new_pos_value(const pos_t pos,
+                       const int stage_id)
+{
+    if (! is_new_pos_value_valid(pos,stage_id))
+    {
+        std::ostringstream oss;
+        oss << "ERROR:: reference sequence position difference too high for multi_stage_circular_buffer\n"
+            << "current position:\t" << (pos+1) << "\n"
+            << "top position for stage:\t" << (_max_pos+1) << "\n"
+            << "stage id:\t" << stage_id << "\n";
+        throw blt_exception(oss.str().c_str());
+    }
+}
+
+
+
+static
+bool
+get_is_any_minpos(const std::vector<uint8_t>& minpos,
+                  const unsigned stage_size)
+{
+    for (unsigned i(0); i<stage_size; ++i) if (minpos[i]) return true;
+    return false;
+}
+
+
+
+void
+stage_manager::
+process_pos(const pos_t pos)
+{
+    if (! _is_head_pos)
+    {
+        _head_pos = pos;
+        _is_head_pos = true;
+    }
+
+    if (_is_any_minpos)
+    {
+        for (pos_t p(_head_pos); p<=pos; ++p)
+        {
+            for (unsigned s(0); s<_stage_size; ++s)
+            {
+                const pos_t stage_pos(p-static_cast<pos_t>(_stage_pos_ptr->operator[](s).first));
+                if (stage_pos<_min_pos) break;
+                if (_is_minpos[s] != 0)
+                {
+                    if (stage_pos<_minpos[s]) continue;
+                    _is_minpos[s]=0;
+                }
+                _ppb.check_process_pos(_stage_pos_ptr->operator[](s).second,stage_pos);
+            }
+        }
+        _is_any_minpos=get_is_any_minpos(_is_minpos,_stage_size);
+    }
+    else
+    {
+        for (pos_t p(_head_pos); p<=pos; ++p)
+        {
+            for (unsigned s(0); s<_stage_size; ++s)
+            {
+                const pos_t stage_pos(p-static_cast<pos_t>(_stage_pos_ptr->operator[](s).first));
+                if (stage_pos<_min_pos) break;
+                _ppb.check_process_pos(_stage_pos_ptr->operator[](s).second,stage_pos);
+            }
+        }
+    }
+
+    _head_pos=pos+1;
+}
+
+
+
+void
+stage_manager::
+finish_process_pos()
+{
+    if (! _is_head_pos) return;
+
+    if (_is_any_minpos)
+    {
+        for (pos_t p(_head_pos); true; ++p)
+        {
+            pos_t stage_pos(p);
+            for (unsigned s(0); s<_stage_size; ++s)
+            {
+                stage_pos=(p-static_cast<pos_t>(_stage_pos_ptr->operator[](s).first));
+                if (stage_pos>=_head_pos) continue;
+                if (stage_pos<_min_pos) break;
+                if (_is_minpos[s] != 0)
+                {
+                    if (stage_pos<_minpos[s]) continue;
+                    _is_minpos[s]=0;
+                }
+                _ppb.check_process_pos(_stage_pos_ptr->operator[](s).second,stage_pos);
+            }
+
+            if (stage_pos>=_head_pos) break;
+        }
+        _is_any_minpos=get_is_any_minpos(_is_minpos,_stage_size);
+    }
+    else
+    {
+        for (pos_t p(_head_pos); true; ++p)
+        {
+            pos_t stage_pos(p);
+            for (unsigned s(0); s<_stage_size; ++s)
+            {
+                stage_pos=(p-static_cast<pos_t>(_stage_pos_ptr->operator[](s).first));
+                if (stage_pos>=_head_pos) continue;
+                if (stage_pos<_min_pos) break;
+                _ppb.check_process_pos(_stage_pos_ptr->operator[](s).second,stage_pos);
+            }
+
+            if (stage_pos>=_head_pos) break;
+        }
+    }
+}
diff --git a/src/c++/lib/blt_util/stage_manager.hh b/src/c++/lib/blt_util/stage_manager.hh
new file mode 100644
index 0000000..4f8a9ee
--- /dev/null
+++ b/src/c++/lib/blt_util/stage_manager.hh
@@ -0,0 +1,252 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/pos_processor_base.hh"
+
+#include "blt_util/pos_range.hh"
+
+#include <cassert>
+
+#include <iosfwd>
+#include <map>
+#include <vector>
+
+
+/// \brief describes stages to the stage_manager
+///
+/// Each stage has an integer id.
+///
+/// All stages are related to each other by a tree. Edge distance on
+/// this tree represent bases of the reference. A root stage must be
+/// defined first, all following stages must have a parent stage and
+/// a distance to the parent.
+///
+/// \example As used by the stage_manager, a simple two-stage tree
+/// where the stages are separated by 100 cycles would mean that the
+/// root stage is executed at the reference position pointer (as
+/// always), but the second stage is executed at positions 100 bases
+/// behind the reference position pointer.
+///
+struct stage_data
+{
+    // position,stage_id pair, where position is total distance of this stage from the root stage:
+    typedef std::pair<unsigned,int> pos_stage_id;
+    // pos_stage_ids, sorted by increasing position and stage id:
+    typedef std::vector<pos_stage_id> stage_pos_t;
+
+    /// \brief Add a "root" stage:
+    void
+    add_stage(const int id)
+    {
+        return add_stage(id,0,0,false);
+    }
+
+    /// \brief Add a child stage which follows at a certain distance from its
+    /// parent
+    ///
+    /// parent id must already have been entered
+    ///
+    void
+    add_stage(const int id,
+              const int parent_id,
+              const unsigned parent_distance,
+              const bool is_parent=true);
+
+    /// The stages are summarized to the stage manager through the
+    /// following interface:
+    ///
+    const stage_pos_t&
+    stage_pos() const
+    {
+        return _stage_pos;
+    }
+
+    /// lookup total distance from root stage for any stage id:
+    ///
+    unsigned
+    get_stage_id_shift(const int id) const
+    {
+        idmap_t::const_iterator i(_ids.find(id));
+        if (i==_ids.end()) unknown_id_error(id);
+        return i->second;
+    }
+
+    /// debug output:
+    void
+    dump(std::ostream& os) const;
+
+private:
+    void
+    unknown_id_error(const int id) const;
+
+    // map of stage-id -> total distance from the root stage
+    typedef std::map<int,unsigned> idmap_t;
+
+    idmap_t _ids;
+    stage_pos_t _stage_pos;
+};
+
+
+
+/// \brief help to manage information which is being gathered in an
+/// approximately sequential fashion and processed in sequence in
+/// multiple stages.
+///
+/// assumes that information related to each position will be
+/// available in an approximately sequential fashion, where all
+/// position values submitted after position X will be greater than
+/// (X - first_stage_buffer_size + 1). A violation of this assumption will
+/// trigger a runtime error.
+///
+/// range policy:
+///
+/// if begin_pos is not specified, then event processing and
+/// reporting start at the first pos >= 0 with position information
+/// submitted, else at begin_pos
+///
+/// if end_pos is not specified, then processing ends after last_pos
+/// with information submitted, else at end_pos.
+///
+struct stage_manager
+{
+    // stage_data structure is described above
+    //
+    // report_range is what is sounds like
+    //
+    // pos_processor_base is where stage manager signals stage
+    // execution to
+    //
+    stage_manager(const stage_data& sdata,
+                  const pos_range& report_range,
+                  pos_processor_base& ppb);
+
+    // Process any remaining positions
+    void
+    reset();
+
+    // Handle new pos value is used to indicate a possible advance of
+    // the head position -- this represents the position of the input
+    // information. Note that you must explicitly check whether pos is
+    // too low for any particular stage using the methods further
+    // below, handle_new_pos_value does not do this for you.
+    //
+    // When the head position is advanced, it triggers a series of
+    // stage processing steps along the way. The head position is the
+    // position from which the distances used in stage_data are
+    // measured.
+    //
+    // Example: extending the stage_data example above -- if
+    // stage_data has a root_stage (distance 0) and a child_stage
+    // (distance 100 from root), and the current head_position is
+    // 1000, then setting handle_new_pos_value(1002) would cause the
+    // following sequence of calls to pos_process_base:
+    //
+    // process_pos(1001,root_stage_id);
+    // process_pos(901,child_stage_id);
+    // process_pos(1002,root_stage_id);
+    // process_pos(902,child_stage_id);
+    //
+    void
+    handle_new_pos_value(const pos_t pos);
+
+    // Return true if stage 'stage_id' has not been run on position
+    // yet (ie. pos value is not too low).
+    //
+    bool
+    is_new_pos_value_valid(const pos_t pos,
+                           const int stage_id);
+
+    // Test as above, except that an exception is thrown if the
+    // position is too low.
+    //
+    void
+    validate_new_pos_value(const pos_t pos,
+                           const int stage_id);
+
+    pos_t
+    max_pos() const
+    {
+        return _max_pos;
+    }
+
+    pos_t
+    min_pos() const
+    {
+        return _min_pos;
+    }
+
+    bool
+    is_first_pos_set() const
+    {
+        return _is_first_pos_set;
+    }
+
+    // Revising stage data is very restricted -- the new sdata
+    // must have the the same number of stages, with the same set
+    // of ids. Stages may only increase in length relative to their
+    // prior values.
+    //
+    void
+    revise_stage_data(const stage_data& sdata);
+
+    const stage_data&
+    get_stage_data() const
+    {
+        return _sdata;
+    }
+
+private:
+    // advances head position from its current value to pos,
+    // signaling all stage processing steps to pos_process_base along
+    // the way:
+    //
+    void
+    process_pos(const pos_t pos);
+
+    // advances head position until all remaining stage processing is
+    // complete based on the current head position value.
+    //
+    void
+    finish_process_pos();
+
+    stage_data _sdata;
+
+    bool _is_head_pos;
+    pos_t _head_pos;
+    pos_t _max_pos;
+    pos_t _min_pos;
+    bool _is_first_pos_set;
+
+    const pos_range _report_range;
+    pos_processor_base& _ppb;
+
+    const stage_data::stage_pos_t* _stage_pos_ptr;
+    unsigned _stage_size;
+    std::vector<uint8_t> _is_minpos; //faster lu than vector<bool>
+    std::vector<pos_t> _minpos;
+    bool _is_any_minpos;
+};
+
diff --git a/src/c++/lib/blt_util/stat_util.cpp b/src/c++/lib/blt_util/stat_util.cpp
new file mode 100644
index 0000000..014cae8
--- /dev/null
+++ b/src/c++/lib/blt_util/stat_util.cpp
@@ -0,0 +1,65 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "blt_util/stat_util.hh"
+
+#include "boost/math/distributions/chi_squared.hpp"
+
+
+
+bool
+is_chi_sqr_reject(
+    const double xsq,
+    const unsigned df,
+    const double alpha)
+{
+    assert(xsq>=0);
+    assert(df>0);
+
+    boost::math::chi_squared dist(df);
+    return ((1.-boost::math::cdf(dist,xsq)) < alpha);
+
+#if 0
+    // alternate implementation (is one faster?):
+    const double xsq_crit_val(boost::math::quantile(dist,1.-alpha));
+    return xsq>xsq_crit_val;
+#endif
+}
+
+
+
+bool
+is_lrt_reject_null(
+    const double null_loghood,
+    const double alt_loghood,
+    const unsigned df,
+    const double alpha)
+{
+    if (df == 0) return false;
+    if (null_loghood>alt_loghood) return false;
+
+    const double log_lrt(-2.*(null_loghood-alt_loghood));
+
+    return is_chi_sqr_reject(log_lrt,df,alpha);
+}
diff --git a/src/c++/lib/blt_util/stat_util.hh b/src/c++/lib/blt_util/stat_util.hh
new file mode 100644
index 0000000..526d7e6
--- /dev/null
+++ b/src/c++/lib/blt_util/stat_util.hh
@@ -0,0 +1,38 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+bool
+is_chi_sqr_reject(
+    const double xsq,
+    const unsigned df,
+    const double alpha);
+
+bool
+is_lrt_reject_null(
+    const double null_loghood,
+    const double alt_loghood,
+    const unsigned df,
+    const double alpha);
diff --git a/src/c++/lib/blt_util/stream_stat.cpp b/src/c++/lib/blt_util/stream_stat.cpp
new file mode 100644
index 0000000..ead3cb6
--- /dev/null
+++ b/src/c++/lib/blt_util/stream_stat.cpp
@@ -0,0 +1,38 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "stream_stat.hh"
+
+#include <iostream>
+
+
+
+std::ostream&
+operator<<(std::ostream& os,const stream_stat& ss)
+{
+    os << "sample_size: " << ss.size() << " min: " << ss.min() << " max: " << ss.max()
+       << " mean: " << ss.mean() << " sd: " << ss.sd() << " se: " << ss.stderror();
+
+    return os;
+}
diff --git a/src/c++/lib/blt_util/stream_stat.hh b/src/c++/lib/blt_util/stream_stat.hh
new file mode 100644
index 0000000..03d78d1
--- /dev/null
+++ b/src/c++/lib/blt_util/stream_stat.hh
@@ -0,0 +1,119 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <cmath>
+
+#include <iosfwd>
+#include <limits>
+
+
+
+/// \brief Simple on-line statistics for double values
+///
+/// derived From Tony Cox's IndelFinder code
+///
+/// TODO: there are 3 minor variants of stream_stat now... consolidate this logic via template/inheritance
+///
+struct stream_stat
+{
+    // Accumulate mean and standard dev using a single pass formula
+    // Uses the cancellation-friendly formulae on p.26 of
+    // Higham, Accuracy & Stability of Numerical Algorithms
+    // Variable names follow his
+    stream_stat() : M_(0),Q_(0),max_(0),min_(0),k_(0) {}
+
+    void reset()
+    {
+        M_ = 0;
+        Q_ = 0;
+        max_ = 0;
+        min_ = 0;
+        k_ = 0;
+    }
+
+    void add(const double x)
+    {
+        k_++;
+        if (k_==1 || x>max_) max_=x;
+        if (k_==1 || x<min_) min_=x;
+
+        // important to do M before Q as Q uses previous iterate of M
+        const double delta(x-M_);
+        M_+=delta/static_cast<double>(k_);
+        Q_+=delta*(x-M_);
+    }
+
+    int size() const
+    {
+        return k_;
+    }
+    bool empty() const
+    {
+        return (k_==0);
+    }
+
+    double min() const
+    {
+        return ((k_<1) ? nan() : min_);
+    }
+    double max() const
+    {
+        return ((k_<1) ? nan() : max_);
+    }
+    double mean() const
+    {
+        return ((k_<1) ? nan() : M_);
+    }
+    double variance() const
+    {
+        return ((k_<2) ? nan() : Q_/(static_cast<double>(k_-1)));
+    }
+    double sd() const
+    {
+        return std::sqrt(variance());
+    }
+    double stderror() const
+    {
+        return sd()/std::sqrt(static_cast<double>(k_));
+    }
+
+private:
+    static
+    double nan()
+    {
+        return std::numeric_limits<double>::quiet_NaN();
+    }
+
+    double M_;
+    double Q_;
+    double max_;
+    double min_;
+    unsigned k_;
+};
+
+
+std::ostream& operator<<(std::ostream& os,const stream_stat& ss);
+
diff --git a/src/c++/lib/blt_util/string_util.cpp b/src/c++/lib/blt_util/string_util.cpp
new file mode 100644
index 0000000..f2ac2df
--- /dev/null
+++ b/src/c++/lib/blt_util/string_util.cpp
@@ -0,0 +1,114 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \file
+
+/// \author Chris Saunders
+///
+
+
+#include "string_util.hh"
+
+#include <cstring>
+
+#include <iostream>
+
+
+
+void
+split_string(
+    const char* str,
+    const char delimiter,
+    std::vector<std::string>& v)
+{
+    v.clear();
+    while (true)
+    {
+        const char* next(strchr(str,delimiter));
+        if ((nullptr == next) || (delimiter == '\0'))
+        {
+            v.emplace_back(str);
+            return;
+        }
+        v.emplace_back(str,next-str);
+        str = next+1;
+    }
+}
+
+
+
+void
+destructive_split_string(
+    char* str,
+    const char delimiter,
+    std::vector<const char*>& v)
+{
+    v.clear();
+    while (true)
+    {
+        char* next(strchr(str,delimiter));
+        v.push_back(str);
+        if ((nullptr == next) || (delimiter == '\0')) return;
+        *next = '\0';
+        str = next+1;
+    }
+}
+
+
+
+void
+split_string(
+    const std::string& str,
+    const char delimiter,
+    std::vector<std::string>& v,
+    const bool isSkipEmpty)
+{
+    v.clear();
+
+    size_t start(0);
+    while (true)
+    {
+        size_t next(str.find(delimiter,start));
+        if (! (isSkipEmpty && ((next==start) || (next==std::string::npos))))
+        {
+            v.emplace_back(str.substr(start,next-start));
+        }
+        if (next == std::string::npos) return;
+        start = next+1;
+    }
+}
+
+
+
+bool
+split_match(const std::string& str,
+            const char delimiter,
+            const char* needle)
+{
+    size_t start(0);
+    while (true)
+    {
+        size_t next(str.find(delimiter,start));
+        if (0 == str.compare(start,next-start,needle)) return true;
+        if (next == std::string::npos) break;
+        start = next+1;
+    }
+    return false;
+}
diff --git a/src/c++/lib/blt_util/string_util.hh b/src/c++/lib/blt_util/string_util.hh
new file mode 100644
index 0000000..080f0d1
--- /dev/null
+++ b/src/c++/lib/blt_util/string_util.hh
@@ -0,0 +1,57 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+
+void
+split_string(
+    const char* str,
+    const char delimiter,
+    std::vector<std::string>& v);
+
+/// insert nulls into str to create a vector of c-strs without new allocation
+void
+destructive_split_string(
+    char* str,
+    const char delimiter,
+    std::vector<const char*>& v);
+
+void
+split_string(
+    const std::string& str,
+    const char delimiter,
+    std::vector<std::string>& v,
+    const bool isSkipEmpty = false);
+
+/// check for exact match to pattern after delimiting str by delimiter
+bool
+split_match(
+    const std::string& str,
+    const char delimiter,
+    const char* needle);
+
diff --git a/src/c++/lib/blt_util/test/CMakeLists.txt b/src/c++/lib/blt_util/test/CMakeLists.txt
new file mode 100644
index 0000000..83c64d7
--- /dev/null
+++ b/src/c++/lib/blt_util/test/CMakeLists.txt
@@ -0,0 +1,28 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## Configuration file for the unit tests subdirectory
+##
+## author Ole Schulz-Trieglaff
+##
+################################################################################
+
+include(${THIS_CXX_TEST_LIBRARY_CMAKE})
diff --git a/src/c++/lib/blt_util/test/CircularCounter_test.cpp b/src/c++/lib/blt_util/test/CircularCounter_test.cpp
new file mode 100644
index 0000000..f58187a
--- /dev/null
+++ b/src/c++/lib/blt_util/test/CircularCounter_test.cpp
@@ -0,0 +1,46 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "CircularCounter.hh"
+
+
+BOOST_AUTO_TEST_SUITE( test_CircularCounter )
+
+
+BOOST_AUTO_TEST_CASE( test_CircularCounter1 )
+{
+    CircularCounter cc(3);
+
+    BOOST_CHECK_EQUAL(cc.count(),0);
+
+    cc.push(true);
+    BOOST_CHECK_EQUAL(cc.count(),1);
+
+    cc.push(false);
+    cc.push(false);
+    BOOST_CHECK_EQUAL(cc.count(),1);
+    cc.push(false);
+    BOOST_CHECK_EQUAL(cc.count(),0);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/c++/lib/blt_util/test/MedianDepthTracker_test.cpp b/src/c++/lib/blt_util/test/MedianDepthTracker_test.cpp
new file mode 100644
index 0000000..d48b335
--- /dev/null
+++ b/src/c++/lib/blt_util/test/MedianDepthTracker_test.cpp
@@ -0,0 +1,115 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "MedianDepthTracker.hh"
+
+
+BOOST_AUTO_TEST_SUITE( test_MedianDepthTracker )
+
+
+BOOST_AUTO_TEST_CASE( test_MDT0 )
+{
+    static const double eps(0.00001);
+
+    MedianDepthTracker t;
+
+    BOOST_REQUIRE_CLOSE(t.getMedian(),0.0,eps);
+}
+
+BOOST_AUTO_TEST_CASE( test_MDT1 )
+{
+    static const double eps(0.00001);
+
+    MedianDepthTracker t;
+
+    t.addObs(0);
+    t.addObs(2);
+    t.addObs(1);
+    t.addObs(3);
+
+    BOOST_REQUIRE_CLOSE(t.getMedian(),2.,eps);
+
+    t.addObs(4);
+
+    BOOST_REQUIRE_CLOSE(t.getMedian(),2.5,eps);
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_MDT2 )
+{
+    static const double eps(0.00001);
+
+    MedianDepthTracker t;
+
+    t.addObs(0);
+    t.addObs(2);
+    t.addObs(1);
+    t.addObs(3);
+
+    BOOST_REQUIRE_CLOSE(t.getMedian(),2.,eps);
+
+    t.addObs(2);
+    t.addObs(2);
+    t.addObs(2);
+    t.addObs(2);
+    t.addObs(2);
+
+    BOOST_REQUIRE_CLOSE(t.getMedian(),2.,eps);
+
+    t.addObs(1);
+    t.addObs(1);
+    t.addObs(1);
+    t.addObs(1);
+    t.addObs(1);
+    t.addObs(1);
+    t.addObs(1);
+    t.addObs(1);
+    t.addObs(1);
+
+    BOOST_REQUIRE_CLOSE(t.getMedian(),1.,eps);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_MDT3 )
+{
+    static const double eps(0.00001);
+
+    MedianDepthTracker t;
+
+    t.addObs(1);
+    t.addObs(4);
+    t.addObs(1);
+    t.addObs(4);
+    t.addObs(1);
+    t.addObs(4);
+    t.addObs(1);
+    t.addObs(4);
+    t.addObs(1);
+    t.addObs(4);
+
+    BOOST_REQUIRE_CLOSE(t.getMedian(),2.5,eps);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/test/RangeMap_test.cpp b/src/c++/lib/blt_util/test/RangeMap_test.cpp
new file mode 100644
index 0000000..a25a0c1
--- /dev/null
+++ b/src/c++/lib/blt_util/test/RangeMap_test.cpp
@@ -0,0 +1,147 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "blt_util/RangeMap.hh"
+
+
+BOOST_AUTO_TEST_SUITE( test_RangeMap )
+
+BOOST_AUTO_TEST_CASE( test_RangeMap )
+{
+    RangeMap<int,int> rm;
+
+    rm.getRef(2) = 12;
+    rm.getRef(3000) = 13;
+    rm.getRef(6000) = 15;
+
+    rm.erase(2);
+    rm.getRef(3000) = 3;
+    rm.getRef(9000) = 12;
+    rm.getRef(6000) = 2;
+    rm.erase(9000);
+
+    BOOST_REQUIRE_EQUAL(rm.getConstRef(3000), 3);
+    BOOST_REQUIRE_EQUAL(rm.getConstRef(6000), 2);
+}
+
+BOOST_AUTO_TEST_CASE( test_RangeMap2 )
+{
+    RangeMap<int,int> rm;
+
+    rm.getRef(10000) = 12;
+    rm.getRef(9000) = 13;
+    rm.getRef(7000) = 15;
+
+    rm.erase(7000);
+    BOOST_REQUIRE_EQUAL(rm.getConstRef(9000), 13);
+    BOOST_REQUIRE_EQUAL(rm.getConstRef(10000), 12);
+    rm.getRef(3000) = 3;
+    rm.getRef(9000) = 12;
+    rm.getRef(6000) = 2;
+    rm.erase(9000);
+
+    BOOST_REQUIRE_EQUAL(rm.getConstRef(3000), 3);
+    BOOST_REQUIRE_EQUAL(rm.getConstRef(6000), 2);
+    BOOST_REQUIRE_EQUAL(rm.getConstRef(10000), 12);
+}
+
+BOOST_AUTO_TEST_CASE( test_rangeMap3 )
+{
+    RangeMap<int,int> rm;
+
+    rm.getRef(0) += 1;
+    rm.getRef(20) += 1;
+    rm.getRef(21) += 1;
+    rm.erase(0);
+    rm.erase(20);
+    rm.erase(21);
+    rm.getRef(0) += 1;
+    rm.getRef(20) += 1;
+    rm.getRef(21) += 1;
+
+    BOOST_REQUIRE_EQUAL(rm.getConstRef(20), 1);
+    BOOST_REQUIRE_EQUAL(rm.getConstRef(21), 1);
+}
+
+BOOST_AUTO_TEST_CASE( test_rangeMap_eraseTo )
+{
+    RangeMap<int,int> rm;
+
+    rm.getRef(5) += 1;
+    rm.getRef(20) += 1;
+    rm.getRef(21) += 1;
+
+    rm.eraseTo(0);
+    BOOST_REQUIRE_EQUAL(rm.getConstRef(5), 1);
+    BOOST_REQUIRE_EQUAL(rm.getConstRef(21), 1);
+
+    rm.eraseTo(30);
+    BOOST_REQUIRE(rm.empty());
+
+    rm.getRef(5) += 1;
+    rm.getRef(20) += 1;
+    rm.getRef(21) += 1;
+
+    rm.eraseTo(20);
+    BOOST_REQUIRE_EQUAL(rm.getConstRef(21), 1);
+
+    rm.getRef(10030) += 1;
+    rm.getRef(10032) += 1;
+    rm.getRef(10034) += 1;
+
+    rm.eraseTo(10030);
+    BOOST_REQUIRE_EQUAL(rm.getConstRef(10032), 1);
+    BOOST_REQUIRE_EQUAL(rm.getConstRef(10034), 1);
+
+    rm.erase(10034);
+    BOOST_REQUIRE(!rm.empty());
+    rm.erase(10032);
+    BOOST_REQUIRE(rm.empty());
+}
+
+BOOST_AUTO_TEST_CASE( test_rangeMap_eraseTo2 )
+{
+    RangeMap<int,int> rm;
+
+    rm.getRef(5) += 1;
+    rm.getRef(6) += 1;
+    rm.getRef(7) += 1;
+
+    rm.eraseTo(5);
+
+    BOOST_REQUIRE_EQUAL(rm.getConstRefDefault(5,0), 0);
+    BOOST_REQUIRE_EQUAL(rm.getConstRef(6), 1);
+    BOOST_REQUIRE_EQUAL(rm.getConstRef(7), 1);
+}
+
+BOOST_AUTO_TEST_CASE( test_rangeMap_dataSizeBoundary )
+{
+    RangeMap<int,int> rm(8);
+
+    rm.getRef(0) += 1;
+    rm.getRef(8) += 1;
+    BOOST_REQUIRE_EQUAL(rm.getConstRef(0), 1);
+    BOOST_REQUIRE_EQUAL(rm.getConstRef(8), 1);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/test/RegionSum_test.cpp b/src/c++/lib/blt_util/test/RegionSum_test.cpp
new file mode 100644
index 0000000..6c2c0b2
--- /dev/null
+++ b/src/c++/lib/blt_util/test/RegionSum_test.cpp
@@ -0,0 +1,38 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "RegionSum.hh"
+
+BOOST_AUTO_TEST_SUITE( test_RegionSum )
+
+
+BOOST_AUTO_TEST_CASE( RegionSum_test )
+{
+    RegionSum<unsigned> rs;
+    rs.add(known_pos_range2(3,7),1u);
+    rs.add(known_pos_range2(4,5),2u);
+
+    BOOST_REQUIRE_EQUAL(rs.maxVal(),3u);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/c++/lib/blt_util/test/RegionTracker_test.cpp b/src/c++/lib/blt_util/test/RegionTracker_test.cpp
new file mode 100644
index 0000000..8bee9e3
--- /dev/null
+++ b/src/c++/lib/blt_util/test/RegionTracker_test.cpp
@@ -0,0 +1,306 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "RegionTracker.hh"
+
+
+BOOST_AUTO_TEST_SUITE( test_RegionTracker )
+
+
+BOOST_AUTO_TEST_CASE( test_RegionTrackerSimple )
+{
+    // Simplest test
+    RegionTracker rt;
+
+    rt.addRegion(known_pos_range2(0,1));
+    BOOST_REQUIRE(rt.isIntersectRegion(0));
+    BOOST_REQUIRE(! rt.isIntersectRegion(1));
+}
+
+
+BOOST_AUTO_TEST_CASE( test_RegionTrackerPosIntersect )
+{
+    // region overlap tests
+    {
+        RegionTracker rt;
+
+        rt.addRegion(known_pos_range2(5,10));
+        rt.addRegion(known_pos_range2(2,3));
+        BOOST_REQUIRE_EQUAL(rt.size(),2);
+        BOOST_REQUIRE(  rt.isIntersectRegion(2));
+        BOOST_REQUIRE(! rt.isIntersectRegion(3));
+        BOOST_REQUIRE(! rt.isIntersectRegion(4));
+        BOOST_REQUIRE(  rt.isIntersectRegion(5));
+
+        rt.addRegion(known_pos_range2(3,7));
+        BOOST_REQUIRE_EQUAL(rt.size(),1);
+        BOOST_REQUIRE(  rt.isIntersectRegion(4));
+    }
+    {
+        RegionTracker rt;
+        rt.addRegion(known_pos_range2(5,10));
+        rt.addRegion(known_pos_range2(2,3));
+        rt.addRegion(known_pos_range2(2,5));
+        BOOST_REQUIRE_EQUAL(rt.size(),1);
+        BOOST_REQUIRE(  rt.isIntersectRegion(4));
+    }
+
+    {
+        RegionTracker rt;
+        rt.addRegion(known_pos_range2(5,10));
+        rt.addRegion(known_pos_range2(2,3));
+        rt.addRegion(known_pos_range2(2,4));
+        BOOST_REQUIRE_EQUAL(rt.size(),2);
+        BOOST_REQUIRE(  rt.isIntersectRegion(3));
+    }
+
+    {
+        RegionTracker rt;
+        rt.addRegion(known_pos_range2(5,10));
+        rt.addRegion(known_pos_range2(2,3));
+        rt.addRegion(known_pos_range2(4,5));
+        BOOST_REQUIRE_EQUAL(rt.size(),2);
+        BOOST_REQUIRE(  rt.isIntersectRegion(4));
+    }
+
+    {
+        RegionTracker rt;
+        rt.addRegion(known_pos_range2(1,10));
+        rt.addRegion(known_pos_range2(4,5));
+        BOOST_REQUIRE_EQUAL(rt.size(),1);
+        BOOST_REQUIRE(  rt.isIntersectRegion(4));
+    }
+
+    {
+        RegionTracker rt;
+        rt.addRegion(known_pos_range2(4,5));
+        rt.addRegion(known_pos_range2(1,10));
+        BOOST_REQUIRE_EQUAL(rt.size(),1);
+        BOOST_REQUIRE(  rt.isIntersectRegion(4));
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE( test_RegionTrackerRegionIntersect )
+{
+    // region overlap tests
+    {
+        RegionTracker rt;
+
+        rt.addRegion(known_pos_range2(5,10));
+        rt.addRegion(known_pos_range2(2,3));
+        BOOST_REQUIRE_EQUAL(rt.size(),2);
+        BOOST_REQUIRE(  rt.isIntersectRegion( known_pos_range2(2,10)));
+        BOOST_REQUIRE(! rt.isIntersectRegion( known_pos_range2(3,4)));
+        BOOST_REQUIRE(! rt.isIntersectRegion( known_pos_range2(4,4)));
+        BOOST_REQUIRE(  rt.isIntersectRegion( known_pos_range2(5,11)));
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE( test_RegionTracker3 )
+{
+    // region remove tests
+    RegionTracker rt;
+
+    rt.addRegion(known_pos_range2(5,10));
+    rt.addRegion(known_pos_range2(2,3));
+    rt.addRegion(known_pos_range2(14,15));
+    rt.addRegion(known_pos_range2(24,25));
+    BOOST_REQUIRE_EQUAL(rt.size(),4);
+
+    rt.removeToPos(2);
+    BOOST_REQUIRE_EQUAL(rt.size(),3);
+    rt.removeToPos(2);
+    BOOST_REQUIRE_EQUAL(rt.size(),3);
+    rt.removeToPos(6);
+    BOOST_REQUIRE_EQUAL(rt.size(),3);
+    rt.removeToPos(16);
+    BOOST_REQUIRE_EQUAL(rt.size(),1);
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_RegionTrackerSubset )
+{
+    // region remove tests
+    RegionTracker rt;
+
+    rt.addRegion(known_pos_range2(5,10));
+    rt.addRegion(known_pos_range2(2,3));
+    rt.addRegion(known_pos_range2(14,15));
+    rt.addRegion(known_pos_range2(24,25));
+    BOOST_REQUIRE_EQUAL(rt.size(),4);
+
+    BOOST_REQUIRE(rt.isSubsetOfRegion(known_pos_range2(5,10)));
+    BOOST_REQUIRE(rt.isSubsetOfRegion(known_pos_range2(6,7)));
+    BOOST_REQUIRE(! rt.isSubsetOfRegion(known_pos_range2(4,10)));
+    BOOST_REQUIRE(! rt.isSubsetOfRegion(known_pos_range2(5,11)));
+    BOOST_REQUIRE(! rt.isSubsetOfRegion(known_pos_range2(0,1)));
+    BOOST_REQUIRE(! rt.isSubsetOfRegion(known_pos_range2(30,31)));
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_RegionPayloadTracker )
+{
+    // Simplest test
+    RegionPayloadTracker<int> rt;
+
+    rt.addRegion(known_pos_range2(0,1),25);
+    auto val = rt.isIntersectRegion(0);
+    BOOST_REQUIRE(val);
+    BOOST_REQUIRE_EQUAL(*val,25);
+    BOOST_REQUIRE(! rt.isIntersectRegion(1));
+}
+
+BOOST_AUTO_TEST_CASE( test_RegionPayloadTracker2 )
+{
+    // region overlap tests
+    {
+        RegionPayloadTracker<int> rt;
+
+        BOOST_REQUIRE(rt.addRegion(known_pos_range2(5,10),5));
+        BOOST_REQUIRE(rt.addRegion(known_pos_range2(2,3),5));
+        BOOST_REQUIRE_EQUAL(rt.size(),2);
+        BOOST_REQUIRE(  rt.isIntersectRegion(2));
+        BOOST_REQUIRE(! rt.isIntersectRegion(3));
+        BOOST_REQUIRE(! rt.isIntersectRegion(4));
+        BOOST_REQUIRE(  rt.isIntersectRegion(5));
+
+        BOOST_REQUIRE(rt.addRegion(known_pos_range2(3,7),5));
+        BOOST_REQUIRE_EQUAL(rt.size(),1);
+        BOOST_REQUIRE(  rt.isIntersectRegion(4));
+    }
+    {
+        RegionPayloadTracker<int> rt;
+        BOOST_REQUIRE(rt.addRegion(known_pos_range2(5,10),5));
+        BOOST_REQUIRE(rt.addRegion(known_pos_range2(2,3),5));
+        BOOST_REQUIRE(rt.addRegion(known_pos_range2(2,5),5));
+        BOOST_REQUIRE_EQUAL(rt.size(),1);
+        BOOST_REQUIRE(  rt.isIntersectRegion(4));
+    }
+
+    {
+        RegionPayloadTracker<int> rt;
+        rt.addRegion(known_pos_range2(5,10),5);
+        rt.addRegion(known_pos_range2(2,3),5);
+        rt.addRegion(known_pos_range2(2,4),5);
+        BOOST_REQUIRE_EQUAL(rt.size(),2);
+        BOOST_REQUIRE(  rt.isIntersectRegion(3));
+    }
+
+    {
+        RegionPayloadTracker<int> rt;
+        rt.addRegion(known_pos_range2(5,10),5);
+        rt.addRegion(known_pos_range2(2,3),5);
+        rt.addRegion(known_pos_range2(4,5),5);
+        BOOST_REQUIRE_EQUAL(rt.size(),2);
+        BOOST_REQUIRE(  rt.isIntersectRegion(4));
+    }
+
+    {
+        RegionPayloadTracker<int> rt;
+        rt.addRegion(known_pos_range2(1,10),5);
+        rt.addRegion(known_pos_range2(4,5),5);
+        BOOST_REQUIRE_EQUAL(rt.size(),1);
+        BOOST_REQUIRE(  rt.isIntersectRegion(4));
+    }
+
+    {
+        RegionPayloadTracker<int> rt;
+        rt.addRegion(known_pos_range2(4,5),5);
+        rt.addRegion(known_pos_range2(1,10),5);
+        BOOST_REQUIRE_EQUAL(rt.size(),1);
+        BOOST_REQUIRE(  rt.isIntersectRegion(4));
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE( test_RegionPayloadTracker3 )
+{
+    // region remove tests
+    RegionPayloadTracker<int> rt;
+
+    rt.addRegion(known_pos_range2(5,10),5);
+    rt.addRegion(known_pos_range2(2,3),5);
+    rt.addRegion(known_pos_range2(14,15),5);
+    rt.addRegion(known_pos_range2(24,25),5);
+    BOOST_REQUIRE_EQUAL(rt.size(),4);
+
+    rt.removeToPos(2);
+    BOOST_REQUIRE_EQUAL(rt.size(),3);
+    rt.removeToPos(2);
+    BOOST_REQUIRE_EQUAL(rt.size(),3);
+    rt.removeToPos(6);
+    BOOST_REQUIRE_EQUAL(rt.size(),3);
+    rt.removeToPos(16);
+    BOOST_REQUIRE_EQUAL(rt.size(),1);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_RegionPayloadTracker4 )
+{
+    // payload conflict tests
+    {
+        RegionPayloadTracker<int> rt;
+
+        BOOST_REQUIRE( rt.addRegion(known_pos_range2(5,10),5));
+        BOOST_REQUIRE(! rt.addRegion(known_pos_range2(8,14),4));
+        BOOST_REQUIRE_EQUAL(rt.size(),1);
+        BOOST_REQUIRE(rt.addRegion(known_pos_range2(10,14),4));
+        BOOST_REQUIRE(rt.addRegion(known_pos_range2(3,5),3));
+
+        BOOST_REQUIRE(  rt.isIntersectRegion(3));
+        BOOST_REQUIRE_EQUAL(  *rt.isIntersectRegion(3), 3);
+    }
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_PayloadRegionTrackerSubset )
+{
+    // region remove tests
+    RegionPayloadTracker<int> rt;
+
+    rt.addRegion(known_pos_range2(5,10),5);
+    rt.addRegion(known_pos_range2(2,3),4);
+    rt.addRegion(known_pos_range2(14,15),-1);
+    rt.addRegion(known_pos_range2(24,25),0);
+    BOOST_REQUIRE_EQUAL(rt.size(),4);
+
+    const auto test1(rt.isSubsetOfRegion(known_pos_range2(5,10)));
+    BOOST_REQUIRE(test1);
+    BOOST_REQUIRE_EQUAL(*test1 , 5);
+
+    const auto test2(rt.isSubsetOfRegion(known_pos_range2(6,7)));
+    BOOST_REQUIRE(test2);
+    BOOST_REQUIRE_EQUAL(*test2 , 5);
+    BOOST_REQUIRE(! rt.isSubsetOfRegion(known_pos_range2(4,10)));
+    BOOST_REQUIRE(! rt.isSubsetOfRegion(known_pos_range2(5,11)));
+    BOOST_REQUIRE(! rt.isSubsetOfRegion(known_pos_range2(0,1)));
+    BOOST_REQUIRE(! rt.isSubsetOfRegion(known_pos_range2(30,31)));
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/test/SampleVector_test.cpp b/src/c++/lib/blt_util/test/SampleVector_test.cpp
new file mode 100644
index 0000000..061ac78
--- /dev/null
+++ b/src/c++/lib/blt_util/test/SampleVector_test.cpp
@@ -0,0 +1,47 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "SampleVector.hh"
+
+#include <random>
+
+
+BOOST_AUTO_TEST_SUITE( test_SampleVector )
+
+
+BOOST_AUTO_TEST_CASE( test_SampleVector1 )
+{
+    std::mt19937 rngEngine(0);
+    SampleVector<int,std::mt19937> sv(2,rngEngine);
+
+    for (unsigned i(0); i<100; ++i)
+    {
+        sv.push(i);
+    }
+
+    // can't make this portable:
+    // BOOST_CHECK_EQUAL(sv.data()[0],34);
+    // BOOST_CHECK_EQUAL(sv.data()[1],70);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/c++/lib/blt_util/test/SimpleAlignment_test.cpp b/src/c++/lib/blt_util/test/SimpleAlignment_test.cpp
new file mode 100644
index 0000000..510b51a
--- /dev/null
+++ b/src/c++/lib/blt_util/test/SimpleAlignment_test.cpp
@@ -0,0 +1,62 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "blt_util/SimpleAlignment.hh"
+
+
+BOOST_AUTO_TEST_SUITE( SimpleAlignment_path )
+
+
+BOOST_AUTO_TEST_CASE( test_SimpleAlignment )
+{
+    using namespace ALIGNPATH;
+
+    SimpleAlignment al;
+
+    al.pos = 100;
+    cigar_to_apath("100M", al.path);
+
+    const known_pos_range2 testRange(matchifyEdgeSoftClipRefRange(al));
+
+    BOOST_REQUIRE_EQUAL(testRange.begin_pos(), 100);
+    BOOST_REQUIRE_EQUAL(testRange.end_pos(), 200);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SimpleAlignment2 )
+{
+    using namespace ALIGNPATH;
+
+    SimpleAlignment al;
+
+    al.pos = 100;
+    cigar_to_apath("10S50M10D40M10S", al.path);
+
+    const known_pos_range2 testRange(matchifyEdgeSoftClipRefRange(al));
+
+    BOOST_REQUIRE_EQUAL(testRange.begin_pos(), 90);
+    BOOST_REQUIRE_EQUAL(testRange.end_pos(), 210);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/test/SizeDistribution_test.cpp b/src/c++/lib/blt_util/test/SizeDistribution_test.cpp
new file mode 100644
index 0000000..dd5e974
--- /dev/null
+++ b/src/c++/lib/blt_util/test/SizeDistribution_test.cpp
@@ -0,0 +1,117 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/test/unit_test.hpp"
+#include "boost/test/floating_point_comparison.hpp"
+
+#include "blt_util/SizeDistribution.cpp"
+
+
+
+BOOST_AUTO_TEST_SUITE( test_SizeDistribution )
+
+BOOST_AUTO_TEST_CASE( test_EmptySizeDistribution )
+{
+    SizeDistribution sd;
+
+    BOOST_REQUIRE_EQUAL(sd.cdf(2),0);
+    BOOST_REQUIRE_EQUAL(sd.quantile(0.2),0);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SizeDistribution1 )
+{
+    SizeDistribution sd;
+
+    sd.addObservation(1);
+    sd.addObservation(2);
+    sd.addObservation(3);
+    sd.addObservation(4);
+
+    BOOST_REQUIRE_EQUAL(sd.cdf(0),0.);
+    BOOST_REQUIRE_EQUAL(sd.cdf(1),0.25);
+    BOOST_REQUIRE_EQUAL(sd.cdf(2),0.5);
+    BOOST_REQUIRE_EQUAL(sd.cdf(3),0.75);
+    BOOST_REQUIRE_EQUAL(sd.cdf(4),1);
+    BOOST_REQUIRE_EQUAL(sd.quantile(0.0),1);
+    BOOST_REQUIRE_EQUAL(sd.quantile(0.25),1);
+    BOOST_REQUIRE_EQUAL(sd.quantile(0.5),2);
+    BOOST_REQUIRE_EQUAL(sd.quantile(0.74),3);
+    BOOST_REQUIRE_EQUAL(sd.quantile(0.75),3);
+    BOOST_REQUIRE_EQUAL(sd.quantile(0.76),4);
+    BOOST_REQUIRE_EQUAL(sd.quantile(1.0),4);
+}
+
+BOOST_AUTO_TEST_CASE( test_SizeDistributionFilter )
+{
+    SizeDistribution sd;
+
+    sd.addObservation(1);
+    sd.addObservation(2);
+    sd.addObservation(3);
+    sd.addObservation(4);
+
+    sd.filterObservationsOverQuantile(0.5);
+
+    BOOST_REQUIRE_EQUAL(sd.totalObservations(),2u);
+
+    BOOST_REQUIRE_EQUAL(sd.cdf(0),0.);
+    BOOST_REQUIRE_EQUAL(sd.cdf(1),0.5);
+    BOOST_REQUIRE_EQUAL(sd.cdf(2),1);
+    BOOST_REQUIRE_EQUAL(sd.quantile(0.0),1);
+    BOOST_REQUIRE_EQUAL(sd.quantile(0.25),1);
+    BOOST_REQUIRE_EQUAL(sd.quantile(0.5),1);
+    BOOST_REQUIRE_EQUAL(sd.quantile(0.75),2);
+    BOOST_REQUIRE_EQUAL(sd.quantile(1.0),2);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SizeDistributionPdf )
+{
+    SizeDistribution sd;
+
+    sd.addObservation(1);
+    sd.addObservation(2);
+    sd.addObservation(3);
+    sd.addObservation(4);
+    sd.addObservation(5);
+    sd.addObservation(6);
+    sd.addObservation(7);
+    sd.addObservation(8);
+    sd.addObservation(9);
+    sd.addObservation(10);
+
+    static const float tol(0.0001);
+    BOOST_REQUIRE_CLOSE(sd.pdf(1),0.1f, tol);
+    BOOST_REQUIRE_CLOSE(sd.pdf(5),0.1f, tol);
+    BOOST_REQUIRE_CLOSE(sd.pdf(10),0.1f, tol);
+
+    static const float expect2(0.1*5./6.);
+    BOOST_REQUIRE_CLOSE(sd.pdf(0), expect2, tol);
+    BOOST_REQUIRE_CLOSE(sd.pdf(11), expect2, tol);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/test/align_path_test.cpp b/src/c++/lib/blt_util/test/align_path_test.cpp
new file mode 100644
index 0000000..b396eef
--- /dev/null
+++ b/src/c++/lib/blt_util/test/align_path_test.cpp
@@ -0,0 +1,222 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "blt_util/align_path.hh"
+
+//#define DEBUG_AP_TEST
+
+#ifdef DEBUG_AP_TEST
+#include <iostream>
+namespace
+{
+std::ostream& log_os(std::cerr);
+}
+#endif
+
+
+
+BOOST_AUTO_TEST_SUITE( test_align_path )
+
+using namespace ALIGNPATH;
+
+
+static
+void
+test_single_cigar_conversion(const std::string& input)
+{
+
+    path_t apath;
+    cigar_to_apath(input.c_str(),apath);
+    BOOST_REQUIRE_EQUAL(input,apath_to_cigar(apath));
+}
+
+BOOST_AUTO_TEST_CASE( test_align_path_cigar_conversion )
+{
+
+    path_t apath;
+    cigar_to_apath("10I10M10D10M10S",apath);
+    BOOST_REQUIRE_EQUAL(apath.size(),5u);
+
+    // test round-trip:
+    test_single_cigar_conversion("10I10M2S20M2I10M10D10M");
+    test_single_cigar_conversion("");
+    test_single_cigar_conversion("10S");
+}
+
+
+BOOST_AUTO_TEST_CASE( test_align_path_ref_length )
+{
+    path_t apath;
+    cigar_to_apath("2I10M10D4I10M10N10M3S",apath);
+    BOOST_REQUIRE_EQUAL(apath_ref_length(apath),50u);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_align_path_read_length )
+{
+    path_t apath;
+    cigar_to_apath("2I10M10D4I10M10N10M3S",apath);
+    BOOST_REQUIRE_EQUAL(apath_read_length(apath),39u);
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_apath_indel_count )
+{
+    path_t apath;
+    cigar_to_apath("2I10M10D4I10M1D10M10N10M3S",apath);
+    BOOST_REQUIRE_EQUAL(apath_indel_count(apath),3u);
+}
+
+
+
+static
+void
+test_string_clean(const char* cigar, const char* expect)
+{
+
+    path_t apath;
+    cigar_to_apath(cigar,apath);
+    apath_cleaner(apath);
+
+    path_t expect_path;
+    cigar_to_apath(expect,expect_path);
+
+#ifdef DEBUG_AP_TEST
+    log_os << "cleaned,expect: " << apath << " " << expect_path << "\n";
+#endif
+
+    BOOST_REQUIRE_EQUAL(apath,expect_path);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_align_path_cleaner )
+{
+
+    // test cases for apath_cleaner function:
+    test_string_clean("29M2I5M1D0M3I20M16S","29M2I5M1D3I20M16S");
+    test_string_clean("1M1P1M","2M");
+    test_string_clean("1M1D0M1I1M","1M1D1I1M");
+    //test_string_clean("0H1H1S0I1M1D0M1I1I1D1I1M1D1M","29M2I5M1D3I20M16S");
+}
+
+BOOST_AUTO_TEST_CASE( test_apath_clean_seqmatch )
+{
+    const std::string testCigar("10M1D10=2X10=1D1M1=1=1X1=1X");
+    ALIGNPATH::path_t testPath;
+    cigar_to_apath(testCigar.c_str(), testPath);
+
+    apath_clean_seqmatch(testPath);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(testPath), "10M1D22M1D6M");
+}
+
+
+BOOST_AUTO_TEST_CASE( test_apath_add_seqmatch )
+{
+    static const std::string testRead("AABAXXXY");
+    static const std::string testRef ("AAAADXXXX");
+
+    static const std::string testCigar("4M1D4M");
+    ALIGNPATH::path_t testPath;
+    cigar_to_apath(testCigar.c_str(), testPath);
+
+    apath_add_seqmatch(
+        testRead.begin(), testRead.end(),
+        testRef.begin(), testRef.end(),
+        testPath);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(testPath), "2=1X1=1D3=1X");
+}
+
+
+
+static
+std::string
+test_limit_case(
+    const std::string& cigar,
+    const bool isReverse,
+    const unsigned length)
+{
+    ALIGNPATH::path_t path;
+    cigar_to_apath(cigar.c_str(), path);
+    if (isReverse)
+    {
+        std::reverse(path.begin(),path.end());
+    }
+    apath_limit_ref_length(length, path);
+    return apath_to_cigar(path);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_apath_limit_ref_length )
+{
+    static const std::string testCigar("2=1X1=4D3=1X");
+    BOOST_REQUIRE_EQUAL(test_limit_case(testCigar,false,1),"1=");
+    BOOST_REQUIRE_EQUAL(test_limit_case(testCigar,false,5),"2=1X1=1D");
+    BOOST_REQUIRE_EQUAL(test_limit_case(testCigar,false,100),testCigar);
+
+    BOOST_REQUIRE_EQUAL(test_limit_case(testCigar,true,1),"1X");
+    BOOST_REQUIRE_EQUAL(test_limit_case(testCigar,true,5),"1X3=1D");
+}
+
+
+static
+std::string
+test_read_limit_case(
+    const std::string& cigar,
+    const bool isReverse,
+    const unsigned start,
+    const unsigned end)
+{
+    ALIGNPATH::path_t path;
+    cigar_to_apath(cigar.c_str(), path);
+    if (isReverse)
+    {
+        std::reverse(path.begin(),path.end());
+    }
+    apath_limit_read_length(start, end, path);
+    return apath_to_cigar(path);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_apath_limit_read_length )
+{
+    static const std::string testCigar("2=1X1=4I3=1X");
+    BOOST_REQUIRE_EQUAL(test_read_limit_case(testCigar,false,0,1),"1=");
+    BOOST_REQUIRE_EQUAL(test_read_limit_case(testCigar,false,0,5),"2=1X1=1I");
+    BOOST_REQUIRE_EQUAL(test_read_limit_case(testCigar,false,0,100),testCigar);
+    BOOST_REQUIRE_EQUAL(test_read_limit_case(testCigar,false,1,5),"1=1X1=1I");
+    BOOST_REQUIRE_EQUAL(test_read_limit_case(testCigar,false,2,5),"1X1=1I");
+    BOOST_REQUIRE_EQUAL(test_read_limit_case(testCigar,false,5,10),"3I2=");
+}
+
+BOOST_AUTO_TEST_CASE( test_apath_clip_trail )
+{
+    static const std::string testCigar("2S20M3S4H");
+    ALIGNPATH::path_t path;
+    cigar_to_apath(testCigar.c_str(), path);
+
+    BOOST_REQUIRE_EQUAL(apath_soft_clip_trail_size(path),3u);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/c++/lib/blt_util/test/binomial_test_test.cpp b/src/c++/lib/blt_util/test/binomial_test_test.cpp
new file mode 100644
index 0000000..255ace4
--- /dev/null
+++ b/src/c++/lib/blt_util/test/binomial_test_test.cpp
@@ -0,0 +1,449 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "blt_util/binomial_test.hh"
+
+template <class T, size_t N>
+static size_t carray_size(T (&)[N])
+{
+    return N;
+}
+
+BOOST_AUTO_TEST_SUITE( test_binomial_test )
+
+BOOST_AUTO_TEST_CASE( test_exact_binomial_pval )
+{
+    static const double tol(0.0001);
+
+    // these tests assert a match with corresponding
+    // R functions:
+    const double p(0.14);
+    const unsigned x(5);
+    const unsigned n(12);
+
+    BOOST_REQUIRE_CLOSE(get_binomial_twosided_exact_pval(p, x, n), 0.01807065, tol);
+
+    BOOST_REQUIRE_CLOSE(get_binomial_gte_n_success_exact_pval(p, x, n), 0.01807065, tol);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_simple_binomial_test )
+{
+    static const double alpha(0.01);
+    BOOST_REQUIRE(! is_reject_binomial_twosided(alpha,0.5,1,10));
+    BOOST_REQUIRE(  is_reject_binomial_twosided(alpha,0.5,10,100));
+
+    // run the counts high enough to hit the chi-sq switchpoint:
+    BOOST_REQUIRE(  is_reject_binomial_twosided(alpha,0.5,100,1000));
+    BOOST_REQUIRE(! is_reject_binomial_twosided(alpha,0.1,100,1000));
+
+    // tests to ensure that one-sided p-value from exact test are
+    // working correctly.
+
+    static const double tol(0.0001);
+
+    {
+        //simple case
+        unsigned n(10);
+        unsigned x(1);
+        double p(0.5);
+
+        BOOST_REQUIRE_CLOSE(get_binomial_gte_n_success_exact_pval(p, x, n),0.9990234, tol);
+    }
+
+    //simple case
+    unsigned n(10);
+    unsigned x(5);
+    double p(0.5);
+
+    BOOST_REQUIRE_CLOSE(get_binomial_gte_n_success_exact_pval(p, x, n),0.623046875, tol);
+    BOOST_REQUIRE(! is_reject_binomial_gte_n_success_exact(0.05, p, x, n));
+    BOOST_REQUIRE(  is_reject_binomial_gte_n_success_exact(0.70, p, x, n));
+
+    // if x is 0, p-value should be 1
+    BOOST_REQUIRE_CLOSE(get_binomial_gte_n_success_exact_pval(p, 0, n), 1, tol);
+
+    // more relevant to the binomial probabilities and p-values
+    // observed in somatic indel data
+    n = 50;
+    x = 1;
+    p = 6.484e-5;
+
+    BOOST_REQUIRE_CLOSE(get_binomial_gte_n_success_exact_pval(p, x, n), 3.23685517e-3, tol);
+    BOOST_TEST_MESSAGE( "x " << x << "; n " << n << "; p" << p << " p-value is " << get_binomial_gte_n_success_exact_pval(p, x, n));
+    BOOST_REQUIRE(! is_reject_binomial_gte_n_success_exact(1e-9, p, x, n));
+    BOOST_REQUIRE(  is_reject_binomial_gte_n_success_exact(1e-2, p, x, n));
+
+    x = 4;
+    BOOST_REQUIRE_CLOSE(get_binomial_gte_n_success_exact_pval(p, x, n), 4.06096935e-12, tol);
+    BOOST_TEST_MESSAGE( "x " << x << "; n " << n << "; p" << p << " p-value is " << get_binomial_gte_n_success_exact_pval(p, x, n));
+    BOOST_REQUIRE(! is_reject_binomial_gte_n_success_exact(1e-13, p, x, n));
+    BOOST_REQUIRE(  is_reject_binomial_gte_n_success_exact(1e-9,  p, x, n));
+}
+
+BOOST_AUTO_TEST_CASE( test_binomial_pvalue_0_5_manyvals )
+{
+    /*
+     *  tests = 100
+     *  trials = round(200*runif(tests))
+     *  successes = round(trials*runif(tests))
+     *  d = data.frame(t=seq(1, tests), trials=trials, successes=successes)
+     *  d$p = 0.5
+     *  d = ddply(d, .(t), function(x) {
+     *      print(x)
+     *      x$pval = binom.test(x$successes, x$trials, x$p, alternative="two.sided")$p.value
+     *      return(x)
+     *  })
+     *  print(d)
+     */
+    //     trials successes   p         pval
+    double exampledata[][4] =
+    {
+        {    83,        42, 0.5, 1.000000e+00 },
+        {   104,        55, 0.5, 6.241435e-01 },
+        {    78,        38, 0.5, 9.099465e-01 },
+        {   160,         2, 0.5, 1.762708e-44 },
+        {   168,       110, 0.5, 7.370835e-05 },
+        {   123,        24, 0.5, 5.165950e-12 },
+        {    76,        63, 0.5, 5.044924e-09 },
+        {    95,        29, 0.5, 1.864019e-04 },
+        {    45,        29, 0.5, 7.245426e-02 },
+        {    57,        14, 0.5, 1.538890e-04 },
+        {    48,        45, 0.5, 1.312586e-10 },
+        {    93,        58, 0.5, 2.201859e-02 },
+        {    74,        40, 0.5, 5.613807e-01 },
+        {   193,        43, 0.5, 4.401184e-15 },
+        {    76,        13, 0.5, 5.044924e-09 },
+        {   167,        48, 0.5, 3.825608e-08 },
+        {   171,       104, 0.5, 5.745647e-03 },
+        {    97,        42, 0.5, 2.228777e-01 },
+        {    44,        23, 0.5, 8.803958e-01 },
+        {     2,         1, 0.5, 1.000000e+00 },
+        {    26,        25, 0.5, 8.046627e-07 },
+        {   199,        63, 0.5, 2.466848e-07 },
+        {   152,       148, 0.5, 7.692946e-39 },
+        {    50,        42, 0.5, 1.163556e-06 },
+        {    30,        24, 0.5, 1.430906e-03 },
+        {   173,       114, 0.5, 3.502538e-05 },
+        {    31,         1, 0.5, 2.980232e-08 },
+        {    29,        27, 0.5, 1.624227e-06 },
+        {    20,         6, 0.5, 1.153183e-01 },
+        {    92,         5, 0.5, 2.104348e-20 },
+        {   182,       177, 0.5, 5.284278e-46 },
+        {    14,         5, 0.5, 4.239502e-01 },
+        {   127,        46, 0.5, 2.417552e-03 },
+        {   112,        54, 0.5, 7.769648e-01 },
+        {   146,        53, 0.5, 1.172563e-03 },
+        {   157,        40, 0.5, 5.948252e-10 },
+        {   152,       105, 0.5, 2.899910e-06 },
+        {   196,       195, 0.5, 3.922989e-57 },
+        {    33,        12, 0.5, 1.627557e-01 },
+        {    23,        17, 0.5, 3.468966e-02 },
+        {   104,        44, 0.5, 1.409563e-01 },
+        {    91,        87, 0.5, 2.260483e-21 },
+        {    79,        26, 0.5, 3.183011e-03 },
+        {    16,        11, 0.5, 2.101135e-01 },
+        {   111,        63, 0.5, 1.836572e-01 },
+        {    43,        14, 0.5, 3.153950e-02 },
+        {   157,        97, 0.5, 3.921773e-03 },
+        {   131,        60, 0.5, 3.823543e-01 },
+        {   191,       111, 0.5, 2.968512e-02 },
+        {    99,        11, 0.5, 4.529580e-16 },
+        {    73,        58, 0.5, 4.093199e-07 },
+        {   157,       112, 0.5, 8.922398e-08 },
+        {    51,        17, 0.5, 2.409291e-02 },
+        {   106,        29, 0.5, 3.452561e-06 },
+        {    20,         6, 0.5, 1.153183e-01 },
+        {   187,        45, 0.5, 6.577780e-13 },
+        {   164,       149, 0.5, 6.266311e-29 },
+        {   126,        74, 0.5, 6.093825e-02 },
+        {   163,        88, 0.5, 3.472932e-01 },
+        {    97,        46, 0.5, 6.848588e-01 },
+        {    23,         5, 0.5, 1.062202e-02 },
+        {   126,        30, 0.5, 3.044627e-09 },
+        {   123,        42, 0.5, 5.560924e-04 },
+        {   187,        21, 0.5, 3.631339e-29 },
+        {   174,       108, 0.5, 1.800200e-03 },
+        {    11,         2, 0.5, 6.542969e-02 },
+        {    82,        77, 0.5, 1.204638e-17 },
+        {    11,         2, 0.5, 6.542969e-02 },
+        {   152,       138, 0.5, 8.462475e-27 },
+        {    20,        10, 0.5, 1.000000e+00 },
+        {   139,        56, 0.5, 2.707353e-02 },
+        {    84,        59, 0.5, 2.664511e-04 },
+        {    16,        15, 0.5, 5.187988e-04 },
+        {    87,        70, 0.5, 8.350916e-09 },
+        {   124,        55, 0.5, 2.429228e-01 },
+        {    52,        18, 0.5, 3.648340e-02 },
+        {   136,        86, 0.5, 2.558735e-03 },
+        {   191,        59, 0.5, 1.346152e-07 },
+        {   194,        91, 0.5, 4.297475e-01 },
+        {    84,        33, 0.5, 6.297226e-02 },
+        {    85,        84, 0.5, 4.446096e-24 },
+        {    72,        20, 0.5, 2.077160e-04 },
+        {   191,        90, 0.5, 4.694208e-01 },
+        {   176,       169, 0.5, 2.002554e-41 },
+        {   177,        38, 0.5, 9.936809e-15 },
+        {   112,        84, 0.5, 1.110700e-07 },
+        {    31,        26, 0.5, 1.921952e-04 },
+        {    41,        33, 0.5, 1.122214e-04 },
+        {   182,        25, 0.5, 1.407601e-24 },
+        {   170,         3, 0.5, 1.094465e-45 },
+        {   156,        81, 0.5, 6.890543e-01 },
+        {    11,         5, 0.5, 1.000000e+00 },
+        {   108,        12, 0.5, 1.957750e-17 },
+        {     6,         6, 0.5, 3.125000e-02 },
+        {   116,        58, 0.5, 1.000000e+00 },
+        {    85,        40, 0.5, 6.646455e-01 },
+        {   151,        32, 0.5, 5.591514e-13 },
+        {   170,       115, 0.5, 4.870266e-06 },
+        {   106,        10, 0.5, 8.742831e-19 },
+        {   176,       127, 0.5, 3.617000e-09 }
+    };
+
+    const size_t nexamples = carray_size(exampledata);
+    for (size_t i = 0; i < nexamples; ++i)
+    {
+        unsigned trials = (unsigned )exampledata[i][0];
+        unsigned successes = (unsigned )exampledata[i][1];
+        double p = exampledata[i][2];
+        static const double tol(0.01);
+        BOOST_CHECK_CLOSE(get_binomial_twosided_exact_pval(p, successes, trials), exampledata[i][3], tol);
+    }
+}
+
+BOOST_AUTO_TEST_CASE( test_binomial_pvalue_many_p_manyvals )
+{
+    /*
+     *  tests = 50
+     *  trials = round(200*runif(tests))
+     *  successes = round(trials*runif(tests))
+     *  p = runif(tests)
+     *  d = data.frame(t=seq(1, tests), trials=trials, successes=successes, p=p)
+     *  d = ddply(d, .(t), function(x) {
+     *      print(x)
+     *      x$pval = binom.test(x$successes, x$trials, x$p, alternative="two.sided")$p.value
+     *      return(x)
+     *  })
+     *  print(d)
+     */
+    //     trials successes   p         pval
+    double exampledata[][4] =
+    {
+        {   11,        3, 2.606046e-01,  1.000000e+00 },
+        {  119,       13, 7.146513e-01,  1.937354e-43 },
+        {   18,       15, 9.509081e-01,  5.558266e-02 },
+        {   37,        8, 7.942178e-01,  8.049322e-14 },
+        {  171,      140, 1.810806e-01,  3.046499e-73 },
+        {  153,      145, 2.606830e-02, 1.087423e-217 },
+        {  105,       95, 8.408730e-02,  8.555045e-90 },
+        {  169,      125, 1.841133e-01,  1.636360e-55 },
+        {  145,       40, 8.158598e-01,  2.012806e-45 },
+        {  167,       17, 3.623335e-01,  2.195455e-14 },
+        {   13,        3, 9.886147e-01,  1.014544e-17 },
+        {   94,       66, 7.904077e-01,  4.206094e-02 },
+        {   84,       70, 1.256048e-01,  4.249744e-49 },
+        {   87,        9, 7.920060e-01,  4.165875e-43 },
+        {  151,      137, 5.341560e-01,  2.910204e-23 },
+        {  131,        5, 5.032622e-01,  6.137486e-32 },
+        {   32,       29, 1.933186e-02,  9.393721e-47 },
+        {  154,       61, 4.758460e-02,  1.291923e-39 },
+        {   18,        1, 8.443087e-02,  1.000000e+00 },
+        {   99,       99, 5.012314e-02, 2.012726e-129 },
+        {  162,        4, 1.037136e-01,  2.449286e-04 },
+        {   48,       21, 8.339616e-01,  5.098986e-10 },
+        {  195,      136, 7.164857e-01,  5.781318e-01 },
+        {  115,       98, 5.117219e-01,  2.792499e-14 },
+        {  104,       77, 6.700700e-01,  1.442298e-01 },
+        {   68,       65, 3.900777e-01,  3.113912e-23 },
+        {  104,       41, 6.784836e-01,  2.870279e-09 },
+        {  198,       68, 8.416943e-01,  9.580940e-56 },
+        {  183,       17, 8.236956e-01, 1.083843e-103 },
+        {   15,       12, 8.408493e-01,  7.206539e-01 },
+        {   81,       36, 7.994870e-01,  2.023837e-12 },
+        {  187,       40, 4.973080e-01,  1.656070e-15 },
+        {   68,        0, 9.132837e-01,  6.177995e-73 },
+        {   65,       28, 7.287608e-01,  4.421443e-07 },
+        {  200,       64, 6.089847e-01,  2.115927e-16 },
+        {   81,       49, 9.559001e-01,  1.785869e-22 },
+        {   98,       21, 8.427854e-01,  4.970776e-43 },
+        {   94,       31, 4.296621e-01,  6.021201e-02 },
+        {  118,       39, 9.823598e-01, 3.851294e-108 },
+        {  196,      121, 6.332332e-01,  6.568822e-01 },
+        {   41,       26, 8.377987e-02,  1.804925e-18 },
+        {    7,        5, 4.714176e-01,  2.658962e-01 },
+        {  199,       98, 2.147335e-01,  7.100855e-18 },
+        {   35,       35, 9.683290e-01,  6.288840e-01 },
+        {   14,        6, 2.079378e-01,  5.199681e-02 },
+        {   80,       69, 3.608725e-01,  2.393712e-20 },
+        {   71,       60, 9.829840e-02,  2.988246e-49 },
+        {   74,       48, 6.387641e-01,  9.042237e-01 },
+        {   89,       69, 3.023493e-01,  4.819660e-20 },
+        {    7,        6, 7.700307e-01,  1.000000e+00 },
+    };
+
+    const size_t nexamples = carray_size(exampledata);
+    for (size_t i = 0; i < nexamples; ++i)
+    {
+        unsigned trials = (unsigned )exampledata[i][0];
+        unsigned successes = (unsigned )exampledata[i][1];
+        double p = exampledata[i][2];
+        static const double tol(0.01);
+        BOOST_CHECK_CLOSE(get_binomial_twosided_exact_pval(p, successes, trials), exampledata[i][3], tol);
+    }
+}
+
+BOOST_AUTO_TEST_CASE( test_binomial_gte_min_count )
+{
+    /*
+    * tests <- 100
+
+    # skew towards lower success rates, similar to what we currently see for indel error rates
+    # skew the tests towards low p-values
+    * test_df <- data.frame(n_trials = round(runif(tests, max = 200)),
+    *                       success_rate = 10^runif(tests, min = -7, max = 0),
+    *                       p_value = 10^runif(tests, min = -10, max = 0))
+    * test_df$min_success <- apply(test_df, 1, function(x) qbinom(p = x["p_value"],
+    *                                                             size = x["n_trials"],
+    *                                                             prob = x["success_rate"],
+    *                                                             lower.tail = FALSE))
+
+    * cat(apply(test_df, 1, function(x) sprintf("{%3d, %.5e, %.5e, %3d},",
+    *                                           x["n_trials"], x["success_rate"],
+    *                                           x["p_value"], x["min_success"])),
+    *     sep = "\n")
+     */
+    //  trials success_rate    p-value  min_count
+    double exampledata[][4] =
+    {
+        {153, 2.57316e-03, 1.90497e-01,   1},
+        { 79, 7.12531e-01, 2.08688e-04,  69},
+        {  2, 2.54527e-01, 8.56125e-02,   1},
+        {143, 2.84603e-04, 1.13570e-07,   3},
+        {  9, 7.06301e-05, 1.37532e-01,   0},
+        {124, 1.45277e-06, 4.07325e-10,   2},
+        {173, 1.07099e-04, 6.43000e-06,   2},
+        {  5, 8.79078e-05, 3.78969e-07,   1},
+        { 55, 7.91582e-05, 8.19433e-02,   0},
+        { 99, 7.32748e-04, 8.09058e-02,   0},
+        {  7, 9.42784e-02, 2.01747e-04,   4},
+        {129, 1.50969e-04, 1.04377e-10,   4},
+        {121, 5.45886e-01, 7.98495e-09,  96},
+        {188, 1.43810e-03, 4.87911e-01,   0},
+        {114, 1.78681e-01, 1.79785e-03,  33},
+        {130, 1.39812e-07, 3.98930e-03,   0},
+        { 97, 5.02856e-03, 3.02772e-07,   7},
+        {183, 1.43512e-05, 8.16882e-09,   2},
+        {185, 2.90423e-03, 2.18749e-05,   5},
+        { 84, 2.50545e-03, 5.12018e-03,   2},
+        {116, 9.91294e-04, 2.63842e-05,   3},
+        {109, 8.89566e-06, 3.43128e-07,   2},
+        { 39, 7.61711e-05, 3.75268e-07,   2},
+        {193, 3.68159e-03, 1.62027e-08,   9},
+        {142, 2.59871e-06, 1.50306e-05,   1},
+        {182, 4.02888e-04, 9.75801e-01,   0},
+        {141, 2.14857e-07, 6.16874e-06,   1},
+        { 15, 9.18554e-07, 4.03038e-05,   0},
+        {149, 2.28428e-07, 2.12637e-04,   0},
+        {191, 2.76298e-07, 6.71069e-09,   1},
+        {107, 1.31901e-04, 1.04454e-09,   4},
+        { 65, 1.85190e-07, 5.78113e-02,   0},
+        {141, 2.66733e-06, 1.44922e-02,   0},
+        {190, 1.03013e-05, 1.53499e-09,   2},
+        { 49, 3.26117e-05, 1.84198e-09,   2},
+        {127, 1.25834e-02, 2.48552e-05,   9},
+        { 42, 4.28448e-07, 7.31442e-08,   1},
+        { 60, 6.90027e-03, 1.05096e-07,   7},
+        {144, 2.90896e-02, 6.76604e-06,  15},
+        {138, 4.18742e-01, 4.95768e-02,  67},
+        {117, 9.73191e-06, 4.88660e-04,   1},
+        { 24, 6.70017e-07, 2.68855e-09,   1},
+        {102, 1.58112e-05, 8.01416e-02,   0},
+        {192, 1.25658e-02, 2.44385e-09,  16},
+        { 50, 3.17352e-03, 7.21933e-09,   6},
+        { 27, 1.00049e-04, 8.91732e-06,   1},
+        { 47, 6.11921e-04, 6.35839e-03,   1},
+        {168, 2.29714e-03, 6.40052e-07,   6},
+        { 73, 1.26782e-06, 3.10391e-10,   2},
+        {195, 1.88220e-04, 8.98213e-03,   1},
+        {  1, 1.92718e-07, 3.95304e-06,   0},
+        {155, 7.96789e-06, 5.51888e-01,   0},
+        { 20, 1.71476e-01, 5.86904e-10,  16},
+        {  1, 2.43012e-04, 1.06529e-05,   1},
+        {149, 1.07803e-07, 7.93280e-09,   1},
+        { 11, 4.73269e-05, 1.00482e-10,   2},
+        { 72, 8.34266e-01, 6.74543e-09,  72},
+        {198, 4.17837e-05, 8.69572e-02,   0},
+        {142, 3.85465e-07, 6.11033e-04,   0},
+        {105, 1.42848e-02, 1.19440e-08,  12},
+        {161, 3.83509e-04, 7.23564e-03,   1},
+        {103, 9.24136e-04, 3.22721e-08,   5},
+        {107, 1.13859e-05, 9.39218e-01,   0},
+        {  4, 5.05847e-07, 1.06546e-01,   0},
+        {184, 6.77215e-03, 2.78220e-10,  13},
+        {142, 1.20315e-01, 7.06077e-06,  36},
+        {113, 1.53915e-02, 6.81864e-07,  11},
+        {151, 5.17850e-07, 8.65573e-04,   0},
+        { 87, 3.40905e-05, 6.05011e-09,   2},
+        {106, 8.37584e-03, 2.17082e-02,   3},
+        { 14, 7.23873e-07, 2.66869e-04,   0},
+        { 40, 1.44629e-06, 1.74549e-04,   0},
+        {196, 2.15504e-06, 4.37458e-06,   1},
+        { 75, 1.74346e-02, 1.41908e-04,   7},
+        { 14, 5.88490e-04, 1.12071e-04,   1},
+        {175, 9.32951e-03, 1.76643e-07,  11},
+        { 39, 1.99337e-03, 3.16462e-05,   3},
+        { 15, 5.46313e-01, 1.00483e-04,  15},
+        { 24, 6.67693e-01, 3.52558e-04,  23},
+        {129, 2.56853e-03, 4.44043e-04,   3},
+        { 99, 4.41006e-05, 2.10157e-01,   0},
+        {185, 4.27154e-04, 8.46782e-03,   1},
+        { 39, 6.18477e-03, 2.52357e-02,   1},
+        {167, 3.26215e-02, 1.57803e-10,  25},
+        {101, 5.79464e-04, 5.95059e-09,   4},
+        { 13, 9.57207e-07, 1.10881e-03,   0},
+        {124, 1.96723e-04, 7.19052e-02,   0},
+        {149, 3.75192e-04, 2.53588e-06,   3},
+        {103, 8.99213e-06, 3.28940e-03,   0},
+        {121, 4.10346e-03, 9.68812e-01,   0},
+        {196, 7.97230e-07, 3.46923e-01,   0},
+        {  9, 5.55411e-04, 9.63213e-07,   2},
+        {134, 2.16316e-04, 2.82896e-07,   3},
+        { 99, 1.03844e-05, 2.72108e-06,   1},
+        {140, 1.71302e-03, 2.48607e-01,   0},
+        {196, 1.60494e-04, 7.52108e-10,   4},
+        {147, 1.10965e-05, 4.24813e-01,   0},
+        { 34, 5.76217e-03, 7.92384e-09,   6},
+        {153, 1.05309e-07, 2.31016e-01,   0},
+        {112, 1.39439e-04, 5.75978e-09,   3}
+    };
+
+    const size_t nexamples = carray_size(exampledata);
+    for (size_t i = 0; i < nexamples; ++i)
+    {
+        unsigned trials       = (unsigned )exampledata[i][0];
+        double   success_rate = exampledata[i][1];
+        double   p_val        = exampledata[i][2];
+        BOOST_CHECK_EQUAL((unsigned )min_count_binomial_gte_exact(p_val, success_rate, trials), 1+exampledata[i][3]);
+    }
+}
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/c++/lib/blt_util/test/boost_icl_test.cpp b/src/c++/lib/blt_util/test/boost_icl_test.cpp
new file mode 100644
index 0000000..3acee25
--- /dev/null
+++ b/src/c++/lib/blt_util/test/boost_icl_test.cpp
@@ -0,0 +1,152 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "boost/icl/discrete_interval.hpp"
+#include "boost/icl/interval_map.hpp"
+
+#include <set>
+#include <string>
+
+using namespace boost::icl;
+
+
+BOOST_AUTO_TEST_SUITE( test_boost_icl )
+
+
+BOOST_AUTO_TEST_CASE( boost_icl_test_intersect )
+{
+    discrete_interval<int> interval1 = interval<int>::right_open(3,7);
+    discrete_interval<int> interval2 = interval<int>::right_open(7,8);
+    discrete_interval<int> interval3 = interval<int>::right_open(6,8);
+
+    BOOST_REQUIRE(! intersects(interval1,interval2));
+    BOOST_REQUIRE( intersects(interval1,interval3));
+}
+
+
+BOOST_AUTO_TEST_CASE( boost_icl_test_map )
+{
+    typedef std::set<std::string> test_val;
+    typedef interval_map<int,test_val> map_t;
+    map_t test_map;
+    test_val foo,bar;
+    foo.insert("foo");
+    bar.insert("bar");
+
+
+    // commenting out this code so that we can compile with clang, apparently
+    // clang and boost::icl disagree -- not worth fixing this uniless icl
+    // is used in a production code path
+    //
+
+#if 0
+    test_map.add(std::make_pair(interval<int>::right_open(3,7), foo));
+    test_map.add(std::make_pair(interval<int>::right_open(6,8), bar));
+
+    {
+        map_t::const_iterator begin(test_map.find(5));
+        map_t::const_iterator end(test_map.find(7));
+        for (; begin!=end; ++begin)
+        {
+            std::cerr << "CCC" << begin->first;
+            for (const std::string& s : begin->second)
+            {
+                std::cerr << " " << s;
+            }
+            std::cerr << "\n";
+        }
+    }
+
+    test_map.erase(std::make_pair(interval<int>::right_open(6,8), bar));
+    {
+        map_t::const_iterator begin(test_map.find(5));
+        map_t::const_iterator end(test_map.find(7));
+        for (; begin!=end; ++begin)
+        {
+            std::cerr << "CCC" << begin->first;
+            for (const std::string& s : begin->second)
+            {
+                std::cerr << " " << s;
+            }
+            std::cerr << "\n";
+        }
+    }
+#endif
+}
+
+
+
+BOOST_AUTO_TEST_CASE( boost_icl_test_map2 )
+{
+    typedef interval_map<int,unsigned> map_t;
+    map_t test_map;
+
+    test_map.add(std::make_pair(interval<int>::right_open(3,7),1u));
+    test_map.add(std::make_pair(interval<int>::right_open(4,5),2u));
+    test_map.add(std::make_pair(interval<int>::right_open(6,8),1u));
+
+    BOOST_REQUIRE_EQUAL(test_map.iterative_size(),5u);
+
+    auto b(test_map.begin());
+    BOOST_REQUIRE_EQUAL(b->first.lower(),3);
+    BOOST_REQUIRE_EQUAL(b->first.upper(),4);
+    BOOST_REQUIRE_EQUAL(b->second,1u);
+    ++b;
+    BOOST_REQUIRE_EQUAL(b->first.lower(),4);
+    BOOST_REQUIRE_EQUAL(b->first.upper(),5);
+    BOOST_REQUIRE_EQUAL(b->second,3u);
+    ++b;
+    BOOST_REQUIRE_EQUAL(b->first.lower(),5);
+    BOOST_REQUIRE_EQUAL(b->first.upper(),6);
+    BOOST_REQUIRE_EQUAL(b->second,1u);
+    ++b;
+    BOOST_REQUIRE_EQUAL(b->first.lower(),6);
+    BOOST_REQUIRE_EQUAL(b->first.upper(),7);
+    BOOST_REQUIRE_EQUAL(b->second,2u);
+    ++b;
+    BOOST_REQUIRE_EQUAL(b->first.lower(),7);
+    BOOST_REQUIRE_EQUAL(b->first.upper(),8);
+    BOOST_REQUIRE_EQUAL(b->second,1u);
+}
+
+BOOST_AUTO_TEST_CASE( boost_icl_test_map3 )
+{
+    // recreates ICL bug reported for OS X 10.9/clang 3.5 and boost 1.53
+    // (probably) related: https://github.com/Astron/Astron/issues/213
+    // intent is to test for an assertion from boost ICL, boost value
+    // tests below are just placeholders.
+    //
+    // known to pass with boost 1.56+ and fail in boost 1.55-
+    //
+
+    typedef interval_map<int,unsigned> map_t;
+    map_t test_map;
+
+    test_map.add(std::make_pair(interval<int>::right_open(3,5),1u));
+    test_map.add(std::make_pair(interval<int>::right_open(8,9),2u));
+    test_map.add(std::make_pair(interval<int>::right_open(1,12),1u));
+
+    BOOST_REQUIRE_EQUAL(test_map.iterative_size(),5u);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/test/compat_util_test.cpp b/src/c++/lib/blt_util/test/compat_util_test.cpp
new file mode 100644
index 0000000..92f8206
--- /dev/null
+++ b/src/c++/lib/blt_util/test/compat_util_test.cpp
@@ -0,0 +1,83 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "compat_util.hh"
+
+#include <string>
+
+
+BOOST_AUTO_TEST_SUITE( compat_util )
+
+static
+void
+single_test_round(const double input,
+                  const double expect)
+{
+
+    static const double eps(0.00001);
+    BOOST_CHECK_CLOSE(compat_round(input), expect, eps);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_round )
+{
+    single_test_round(3.5,4.0);
+    single_test_round(3.2,3.0);
+    single_test_round(3.7,4.0);
+    single_test_round(-1.0,-1.0);
+    single_test_round(-1.2,-1.0);
+    single_test_round(-1.5,-2.0);
+    single_test_round(-1.7,-2.0);
+}
+
+
+
+static
+void
+single_test_basename(const char* input,
+                     const char* expect)
+{
+
+    const char* result(compat_basename(input));
+    BOOST_CHECK_EQUAL(std::string(result), std::string(expect));
+}
+
+
+BOOST_AUTO_TEST_CASE( test_basename )
+{
+    single_test_basename("foo","foo");
+    single_test_basename("","");
+
+#ifdef _WIN32
+    single_test_basename("\\foo","foo");
+    single_test_basename("\\\\","");
+    single_test_basename("\\","");
+#else
+    single_test_basename("/foo","foo");
+    single_test_basename("//","");
+    single_test_basename("/","");
+#endif
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/test/depth_buffer_test.cpp b/src/c++/lib/blt_util/test/depth_buffer_test.cpp
new file mode 100644
index 0000000..48c852e
--- /dev/null
+++ b/src/c++/lib/blt_util/test/depth_buffer_test.cpp
@@ -0,0 +1,118 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "depth_buffer.hh"
+
+
+BOOST_AUTO_TEST_SUITE( test_depth_buffer )
+
+
+/// return buffer loaded with simple test pattern
+///
+/// at depth 10Y, depth is Y
+///
+static
+depth_buffer
+get_db_test_pattern()
+{
+    depth_buffer db;
+
+    // load a depth pattern in:
+    for (unsigned i(100); i<110; ++i)
+    {
+        for (unsigned j(i+1); j<110; ++j)
+        {
+            db.inc(j);
+        }
+    }
+    return db;
+}
+
+
+/// return buffer loaded with simple test pattern
+///
+/// at pos Y, depth is Y-100 before compression
+///
+static
+depth_buffer_compressible
+get_db_compressible_test_pattern(
+    const unsigned compressionLevel)
+{
+    depth_buffer_compressible db(compressionLevel);
+
+    // load a depth pattern in:
+    for (unsigned i(101); i<200; ++i)
+    {
+        db.inc(i,200-i);
+    }
+    return db;
+}
+
+
+BOOST_AUTO_TEST_CASE( test_depth_buffer_val )
+{
+    depth_buffer db(get_db_test_pattern());
+    BOOST_REQUIRE_EQUAL(static_cast<int>(db.val(109)),9);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_depth_buffer_clear )
+{
+    depth_buffer db(get_db_test_pattern());
+    db.clear_pos(109);
+    BOOST_REQUIRE_EQUAL(static_cast<int>(db.val(109)),0);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_depth_buffer_range )
+{
+    depth_buffer db(get_db_test_pattern());
+    BOOST_REQUIRE(! db.is_range_ge_than(0,107,8));
+    BOOST_REQUIRE(  db.is_range_ge_than(0,108,8));
+}
+
+BOOST_AUTO_TEST_CASE( test_depth_buffer_compressible_val )
+{
+    depth_buffer_compressible db(get_db_compressible_test_pattern(8));
+    BOOST_REQUIRE_EQUAL(static_cast<int>(db.val(107)),8);
+    BOOST_REQUIRE_EQUAL(static_cast<int>(db.val(110)),8);
+    BOOST_REQUIRE_EQUAL(static_cast<int>(db.val(150)),48);
+    BOOST_REQUIRE_EQUAL(static_cast<int>(db.val(199)),96);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_depth_buffer_compressible_clear )
+{
+    depth_buffer_compressible db(get_db_compressible_test_pattern(8));
+    for (unsigned i(100); i<120; ++i)
+    {
+        db.clear_pos(i);
+    }
+    BOOST_REQUIRE_EQUAL(static_cast<int>(db.val(110)),0);
+    BOOST_REQUIRE_EQUAL(static_cast<int>(db.val(150)),48);
+    BOOST_REQUIRE_EQUAL(static_cast<int>(db.val(199)),96);
+    BOOST_REQUIRE_EQUAL(static_cast<int>(db.val(109)),0);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/test/id_map_test.cpp b/src/c++/lib/blt_util/test/id_map_test.cpp
new file mode 100644
index 0000000..df34eb1
--- /dev/null
+++ b/src/c++/lib/blt_util/test/id_map_test.cpp
@@ -0,0 +1,70 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "id_map.hh"
+
+#include <string>
+
+
+BOOST_AUTO_TEST_SUITE( test_id_map )
+
+
+BOOST_AUTO_TEST_CASE( test_id_set )
+{
+    id_set<std::string> iset;
+
+    iset.insert_key("brown");
+    iset.insert_key("fox");
+
+    BOOST_REQUIRE_EQUAL(iset.test_key("brown"),true);
+    BOOST_REQUIRE_EQUAL(iset.test_key("x"),false);
+
+    const unsigned expect_id(1);
+
+    BOOST_REQUIRE_EQUAL(iset.get_id("fox"),expect_id);
+    BOOST_REQUIRE_EQUAL(iset.get_key(expect_id),std::string("fox"));
+}
+
+
+BOOST_AUTO_TEST_CASE( test_id_map )
+{
+    id_map<std::string,std::string> imap;
+
+    imap.insert("brown","123");
+    imap.insert("fox","456");
+
+    BOOST_REQUIRE_EQUAL(imap.test_key("brown"),true);
+    BOOST_REQUIRE_EQUAL(imap.test_key("x"),false);
+
+    // test key replacement:
+    imap.insert("brown","789");
+    BOOST_REQUIRE_EQUAL(imap.get_value(imap.get_id("brown")),std::string("789"));
+
+    const unsigned expect_id(1);
+
+    BOOST_REQUIRE_EQUAL(imap.get_id("fox"),expect_id);
+    BOOST_REQUIRE_EQUAL(imap.get_key(expect_id),std::string("fox"));
+    BOOST_REQUIRE_EQUAL(imap.get_value(expect_id),std::string("456"));
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/c++/lib/blt_util/test/io_util_test.cpp b/src/c++/lib/blt_util/test/io_util_test.cpp
new file mode 100644
index 0000000..d08128f
--- /dev/null
+++ b/src/c++/lib/blt_util/test/io_util_test.cpp
@@ -0,0 +1,61 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "io_util.hh"
+
+#include <iomanip>
+#include <sstream>
+
+
+BOOST_AUTO_TEST_SUITE( test_io_util )
+
+
+BOOST_AUTO_TEST_CASE( test_StreamScoper )
+{
+    static const double f(0.123456789);
+    static const std::string strf4("0.1235");
+    static const std::string strf2("0.12");
+
+    std::ostringstream oss;
+
+    oss <<  std::fixed << std::setprecision(4);
+
+    oss << f;
+    BOOST_REQUIRE_EQUAL(strf4,oss.str());
+
+    oss.str("");
+
+    {
+        StreamScoper ss(oss);
+        oss <<  std::fixed << std::setprecision(2);
+        oss << f;
+        BOOST_REQUIRE_EQUAL(strf2,oss.str());
+
+        oss.str("");
+    }
+
+    oss << f;
+    BOOST_REQUIRE_EQUAL(strf4,oss.str());
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/c++/lib/blt_util/test/istream_line_splitter_test.cpp b/src/c++/lib/blt_util/test/istream_line_splitter_test.cpp
new file mode 100644
index 0000000..10698e0
--- /dev/null
+++ b/src/c++/lib/blt_util/test/istream_line_splitter_test.cpp
@@ -0,0 +1,94 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "istream_line_splitter.hh"
+
+#include <sstream>
+#include <string>
+
+
+BOOST_AUTO_TEST_SUITE( test_istream_line_splitter )
+
+
+BOOST_AUTO_TEST_CASE( test_istream_line_splitter_parse )
+{
+
+    std::string test_input("1\t2\t3\t4\n11\t22\t33\t44\n");
+    std::istringstream iss(test_input);
+
+    istream_line_splitter dparse(iss);
+
+    int line_no(0);
+    while (dparse.parse_line())
+    {
+        line_no++;
+        static const unsigned expected_col_count(4);
+        BOOST_CHECK_EQUAL(dparse.n_word(),expected_col_count);
+        if       (1==line_no)
+        {
+            BOOST_CHECK_EQUAL(std::string(dparse.word[1]),std::string("2"));
+        }
+        else if (2==line_no)
+        {
+            BOOST_CHECK_EQUAL(std::string(dparse.word[1]),std::string("22"));
+        }
+    }
+}
+
+
+static
+void
+check_long_line(const int init_buffer_size)
+{
+
+    std::string test_input("1ABCDEFGHIJKLMNOPQRSTUVWXYZ\t2\t3\t4ABCDEFG\n11\t22\t33\t44XYZ\n");
+    std::istringstream iss(test_input);
+
+    istream_line_splitter dparse(iss,init_buffer_size);
+
+    int line_no(0);
+    while (dparse.parse_line())
+    {
+        line_no++;
+        static const unsigned expected_col_count(4);
+        BOOST_CHECK_EQUAL(dparse.n_word(),expected_col_count);
+        if       (1==line_no)
+        {
+            BOOST_CHECK_EQUAL(std::string(dparse.word[0]),std::string("1ABCDEFGHIJKLMNOPQRSTUVWXYZ"));
+            BOOST_CHECK_EQUAL(std::string(dparse.word[3]),std::string("4ABCDEFG"));
+        }
+        else if (2==line_no)
+        {
+            BOOST_CHECK_EQUAL(std::string(dparse.word[2]),std::string("33"));
+        }
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE( test_istream_line_splitter_long_line )
+{
+    check_long_line(2);
+    check_long_line(41);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/test/known_pos_range2_test.cpp b/src/c++/lib/blt_util/test/known_pos_range2_test.cpp
new file mode 100644
index 0000000..1b063bb
--- /dev/null
+++ b/src/c++/lib/blt_util/test/known_pos_range2_test.cpp
@@ -0,0 +1,92 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+
+#include "boost/test/unit_test.hpp"
+
+#include "blt_util/known_pos_range2.hh"
+
+
+BOOST_AUTO_TEST_SUITE( test_known_pos_range2 )
+
+
+BOOST_AUTO_TEST_CASE( test_known_pos_range2_is_pos_intersect )
+{
+    // this corresponds to zero-index range [9,19] :
+    const known_pos_range2 pr(9,20);
+
+    BOOST_REQUIRE(! pr.is_pos_intersect(8));
+    BOOST_REQUIRE(  pr.is_pos_intersect(9));
+    BOOST_REQUIRE(  pr.is_pos_intersect(19));
+    BOOST_REQUIRE(! pr.is_pos_intersect(20));
+}
+
+
+BOOST_AUTO_TEST_CASE( test_pos_range_semibound_is_pos_intersect )
+{
+    // this corresponds to zero-index range [-inf,19] :
+    known_pos_range2 pr;
+    pr.set_end_pos(20);
+
+    BOOST_REQUIRE(  pr.is_pos_intersect(8));
+    BOOST_REQUIRE(  pr.is_pos_intersect(9));
+    BOOST_REQUIRE(  pr.is_pos_intersect(19));
+    BOOST_REQUIRE(! pr.is_pos_intersect(20));
+}
+
+BOOST_AUTO_TEST_CASE( test_known_pos_range2_is_range_intersect )
+{
+    // this corresponds to zero-index range [9,19] :
+    const known_pos_range2 pr(9,20);
+
+    // left-side:
+    BOOST_REQUIRE(! pr.is_range_intersect(known_pos_range2(0,9)));
+    BOOST_REQUIRE(  pr.is_range_intersect(known_pos_range2(0,10)));
+
+    // right side:
+    BOOST_REQUIRE(  pr.is_range_intersect(known_pos_range2(19,30)));
+    BOOST_REQUIRE(! pr.is_range_intersect(known_pos_range2(20,30)));
+
+    // superset:
+    BOOST_REQUIRE(  pr.is_range_intersect(known_pos_range2(0,30)));
+
+    // subset:
+    BOOST_REQUIRE(  pr.is_range_intersect(known_pos_range2(12,15)));
+}
+
+
+BOOST_AUTO_TEST_CASE( test_known_pos_range2_intersect_window )
+{
+    // this corresponds to zero-index range [9,19] :
+    const known_pos_range2 pr1(9,20);
+    const known_pos_range2 pr2(30,40);
+
+    BOOST_REQUIRE(! is_intersect_window(pr1, pr2));
+    BOOST_REQUIRE(! is_intersect_window(pr2, pr1));
+
+    BOOST_REQUIRE(! is_intersect_window(pr1, pr2, 10));
+    BOOST_REQUIRE(! is_intersect_window(pr2, pr1, 10));
+
+    BOOST_REQUIRE(is_intersect_window(pr1, pr2, 11));
+    BOOST_REQUIRE(is_intersect_window(pr2, pr1, 11));
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/c++/lib/blt_util/test/math_util_test.cpp b/src/c++/lib/blt_util/test/math_util_test.cpp
new file mode 100644
index 0000000..ca09d33
--- /dev/null
+++ b/src/c++/lib/blt_util/test/math_util_test.cpp
@@ -0,0 +1,83 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "math_util.hh"
+
+#include <cmath>
+
+
+BOOST_AUTO_TEST_SUITE( math_util )
+
+static
+void
+single_log_sum_test(const double x1,
+                    const double x2)
+{
+    static const double eps(0.00001);
+
+    const double expect(std::log(x1+x2));
+
+    const double lnx1(std::log(x1));
+    const double lnx2(std::log(x2));
+    const double result(log_sum(lnx1,lnx2));
+
+    BOOST_REQUIRE_CLOSE(result, expect, eps);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_log_sum )
+{
+
+    single_log_sum_test(0.5,0.2);
+    single_log_sum_test(0.00001,0.00000001);
+    single_log_sum_test(1,1);
+}
+
+
+static
+void
+medianChecker(
+    const int trueVal,
+    const std::vector<int>& nums)
+{
+    std::vector<int> copy1(nums);
+    BOOST_REQUIRE_EQUAL(trueVal,_ps_median(std::begin(copy1), std::end(copy1)));
+    std::vector<int> copy2(nums);
+    BOOST_REQUIRE_EQUAL(trueVal,_ne_median(std::begin(copy2), std::end(copy2)));
+}
+
+
+BOOST_AUTO_TEST_CASE( test_median )
+{
+    {
+        std::vector<int> nums = {9,8,7,0,2,1,3};
+        medianChecker(3,nums);
+    }
+    {
+        std::vector<int> nums  = {9,8,7,0,2,1,3,10,11};
+        medianChecker(7,nums);
+    }
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/test/observer_test.cpp b/src/c++/lib/blt_util/test/observer_test.cpp
new file mode 100644
index 0000000..5918d51
--- /dev/null
+++ b/src/c++/lib/blt_util/test/observer_test.cpp
@@ -0,0 +1,139 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "observer.hh"
+
+#include <string>
+
+BOOST_AUTO_TEST_SUITE( observer_util )
+
+
+struct notifier_test : public notifier<int>
+{
+    void
+    increment_observers() const
+    {
+        notify_observers(2);
+    }
+};
+
+
+
+struct observer_test : public observer<int>
+{
+    observer_test() :
+        val(0)
+    {}
+
+    void
+    watch(const notifier<int>& n)
+    {
+        observe_notifier(n);
+    }
+
+private:
+    void
+    recieve_notification(const notifier<int>&,
+                         const int& n)
+    {
+        val += n;
+    }
+
+public:
+    int val;
+};
+
+
+
+
+BOOST_AUTO_TEST_CASE( test_simple_observer )
+{
+    observer_test obs1;
+    notifier_test not1;
+
+    obs1.watch(not1);
+
+    BOOST_REQUIRE_EQUAL(obs1.val,0);
+    not1.increment_observers();
+    not1.increment_observers();
+
+    BOOST_REQUIRE_EQUAL(obs1.val,4);
+}
+
+BOOST_AUTO_TEST_CASE( test_multi_observer )
+{
+    observer_test obs1;
+    observer_test obs2;
+    notifier_test not1;
+    notifier_test not2;
+
+    obs1.watch(not1);
+    obs1.watch(not2);
+
+    obs2.watch(not1);
+
+    BOOST_REQUIRE_EQUAL(obs1.val,0);
+    BOOST_REQUIRE_EQUAL(obs2.val,0);
+
+    not1.increment_observers();
+    not2.increment_observers();
+
+    BOOST_REQUIRE_EQUAL(obs1.val,4);
+    BOOST_REQUIRE_EQUAL(obs2.val,2);
+}
+
+BOOST_AUTO_TEST_CASE( test_notifier_copy )
+{
+    // copy behavior for notify is designed for sane behavior in stl
+    // structures, test this here
+    observer_test obs1;
+    observer_test obs2;
+    std::vector<notifier_test> notvec(2);
+
+    obs1.watch(notvec[0]);
+    obs1.watch(notvec[1]);
+
+    obs2.watch(notvec[0]);
+
+    BOOST_REQUIRE_EQUAL(obs1.val,0);
+    BOOST_REQUIRE_EQUAL(obs2.val,0);
+
+    notvec[0].increment_observers();
+    notvec[1].increment_observers();
+
+    BOOST_REQUIRE_EQUAL(obs1.val,4);
+    BOOST_REQUIRE_EQUAL(obs2.val,2);
+
+    // simulate an alloc'd resize:
+    std::vector<notifier_test> notvec2(notvec);
+    notvec.clear();
+
+    notvec2[0].increment_observers();
+    notvec2[1].increment_observers();
+
+    BOOST_REQUIRE_EQUAL(obs1.val,8);
+    BOOST_REQUIRE_EQUAL(obs2.val,4);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/test/parse_util_test.cpp b/src/c++/lib/blt_util/test/parse_util_test.cpp
new file mode 100644
index 0000000..fec13a2
--- /dev/null
+++ b/src/c++/lib/blt_util/test/parse_util_test.cpp
@@ -0,0 +1,251 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "parse_util.hh"
+
+#include <cmath>
+
+#include <string>
+
+BOOST_AUTO_TEST_SUITE( parse_util )
+
+using namespace illumina::blt_util;
+
+
+//
+// check int parsing
+//
+BOOST_AUTO_TEST_CASE( test_parse_int )
+{
+    const char* two = "2";
+    const int val(parse_int(two));
+    BOOST_REQUIRE_EQUAL(val, 2);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_int_big )
+{
+    const char* twobig = "20000000000000000000";
+    BOOST_REQUIRE_THROW(parse_int(twobig), std::exception);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_int_small )
+{
+    const char* twosmall = "-20000000000000000000";
+    BOOST_REQUIRE_THROW(parse_int(twosmall), std::exception);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_int_empty )
+{
+    const char* empty = "";
+    BOOST_REQUIRE_THROW(parse_int(empty), std::exception);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_int_tolerate_suffix )
+{
+    const char* suffix = "123abc";
+    const int val(parse_int(suffix));
+    BOOST_REQUIRE_EQUAL(val,123);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_int_str )
+{
+    static const char two[] = "2";
+    const int val(parse_int_str(std::string(two)));
+    BOOST_REQUIRE_EQUAL(val, 2);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_int_str_bad_input )
+{
+    static const std::string junk("ABCD");
+    BOOST_REQUIRE_THROW(parse_int_str(junk), std::exception);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_int_rval )
+{
+    const int val(parse_int_rvalue("2"));
+    BOOST_REQUIRE_EQUAL(val, 2);
+}
+
+
+//
+// check long parsing
+//
+
+// this throws on VS 2013 win64, no other win variants tested
+#ifndef _WIN32
+
+BOOST_AUTO_TEST_CASE( test_parse_long )
+{
+    const char* two = "9223372036854775807";
+    const long val(parse_long(two));
+    BOOST_REQUIRE_EQUAL(val, 9223372036854775807l);
+}
+
+#endif
+
+BOOST_AUTO_TEST_CASE( test_parse_long_big )
+{
+    const char* twobig = "9223372036854775808";
+    BOOST_REQUIRE_THROW(parse_long(twobig), std::exception);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_long_small )
+{
+    const char* twosmall = "-20000000000000000000000000000000000000000000000000000";
+    BOOST_REQUIRE_THROW(parse_long(twosmall), std::exception);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_long_empty )
+{
+    const char* empty = "";
+    BOOST_REQUIRE_THROW(parse_long(empty), std::exception);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_long_str )
+{
+    static const char two[] = "2";
+    const long val(parse_long_str(std::string(two)));
+    BOOST_REQUIRE_EQUAL(val, 2l);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_long_rval )
+{
+    const long val(parse_long_rvalue("2"));
+    BOOST_REQUIRE_EQUAL(val, 2l);
+}
+
+
+//
+// check unsigned parsing
+//
+BOOST_AUTO_TEST_CASE( test_parse_unsigned )
+{
+    const char* two = "2";
+    const unsigned val(parse_unsigned(two));
+    BOOST_REQUIRE_EQUAL(val, 2u);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_unsigned_big )
+{
+    const char* twobig = "20000000000000000000";
+    BOOST_REQUIRE_THROW(parse_unsigned(twobig), std::exception);
+}
+
+// this doesn't throw on VS 2013 win64, no other win variants tested
+#ifndef _WIN32
+
+BOOST_AUTO_TEST_CASE( test_parse_unsigned_small )
+{
+    const char* twosmall = "-2";
+    BOOST_REQUIRE_THROW(parse_unsigned(twosmall), std::exception);
+}
+
+#endif
+
+BOOST_AUTO_TEST_CASE( test_parse_unsigned_empty )
+{
+    const char* empty = "";
+    BOOST_REQUIRE_THROW(parse_unsigned(empty), std::exception);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_unsigned_tolerate_suffix )
+{
+    const char* suffix = "123abc";
+    const unsigned val(parse_unsigned(suffix));
+    BOOST_REQUIRE_EQUAL(val,123u);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_unsigned_str )
+{
+    static const char two[] = "2";
+    const unsigned val(parse_unsigned_str(std::string(two)));
+    BOOST_REQUIRE_EQUAL(val, 2u);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_unsigned_str_bad_input )
+{
+    static const std::string junk("ABCD");
+    BOOST_REQUIRE_THROW(parse_unsigned_str(junk), std::exception);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_unsigned_rval )
+{
+    const unsigned val(parse_unsigned_rvalue("2"));
+    BOOST_REQUIRE_EQUAL(val, 2u);
+}
+
+
+
+//
+// check double parsing
+//
+const double tol(0.0001);
+
+BOOST_AUTO_TEST_CASE( test_parse_double )
+{
+    const char* two = "2.0";
+    const double val(parse_double(two));
+    BOOST_REQUIRE_CLOSE(val, 2.0, tol);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_double_exp )
+{
+    const char* big = "2.0e+100";
+    const double val(parse_double(big));
+    BOOST_REQUIRE_CLOSE(val, 2.0e+100, tol);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_double_inf )
+{
+    const char* inf = "Infinity";
+    const double val(parse_double(inf));
+    BOOST_REQUIRE(std::isinf(val));
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_double_str )
+{
+    const char* two = "2.0";
+    const double val(parse_double_str(std::string(two)));
+    BOOST_REQUIRE_CLOSE(val, 2.0, tol);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_double_str_bad_input )
+{
+    static const std::string junk("ABCD");
+    BOOST_REQUIRE_THROW(parse_double_str(junk), std::exception);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_double_str_bad_input2 )
+{
+    static const std::string junk("");
+    BOOST_REQUIRE_THROW(parse_double_str(junk), std::exception);
+}
+
+BOOST_AUTO_TEST_CASE( test_parse_double_rval )
+{
+    const double val(parse_double_rvalue("2.0"));
+    BOOST_REQUIRE_CLOSE(val, 2.0, tol);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/test/pos_range_test.cpp b/src/c++/lib/blt_util/test/pos_range_test.cpp
new file mode 100644
index 0000000..172ed42
--- /dev/null
+++ b/src/c++/lib/blt_util/test/pos_range_test.cpp
@@ -0,0 +1,96 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+
+#include "boost/test/unit_test.hpp"
+
+#include "blt_util/pos_range.hh"
+
+
+BOOST_AUTO_TEST_SUITE( test_pos_range )
+
+
+BOOST_AUTO_TEST_CASE( test_pos_range_is_pos_intersect )
+{
+    // this corresponds to zero-index range [9,19] :
+    const pos_range pr(9,20);
+
+    BOOST_REQUIRE(! pr.is_pos_intersect(8));
+    BOOST_REQUIRE(  pr.is_pos_intersect(9));
+    BOOST_REQUIRE(  pr.is_pos_intersect(19));
+    BOOST_REQUIRE(! pr.is_pos_intersect(20));
+}
+
+
+BOOST_AUTO_TEST_CASE( test_pos_range_semibound_is_pos_intersect )
+{
+    // this corresponds to zero-index range [-inf,19] :
+    pos_range pr;
+    pr.set_end_pos(20);
+
+    BOOST_REQUIRE(  pr.is_pos_intersect(8));
+    BOOST_REQUIRE(  pr.is_pos_intersect(9));
+    BOOST_REQUIRE(  pr.is_pos_intersect(19));
+    BOOST_REQUIRE(! pr.is_pos_intersect(20));
+}
+
+BOOST_AUTO_TEST_CASE( test_pos_range_is_range_intersect )
+{
+    // this corresponds to zero-index range [9,19] :
+    const pos_range pr(9,20);
+
+    // left-side:
+    BOOST_REQUIRE(! pr.is_range_intersect(pos_range(0,9)));
+    BOOST_REQUIRE(  pr.is_range_intersect(pos_range(0,10)));
+
+    // right side:
+    BOOST_REQUIRE(  pr.is_range_intersect(pos_range(19,30)));
+    BOOST_REQUIRE(! pr.is_range_intersect(pos_range(20,30)));
+
+    // superset:
+    BOOST_REQUIRE(  pr.is_range_intersect(pos_range(0,30)));
+
+    // subset:
+    BOOST_REQUIRE(  pr.is_range_intersect(pos_range(12,15)));
+}
+
+BOOST_AUTO_TEST_CASE( test_pos_range_is_superset_of )
+{
+    // this corresponds to zero-index range [9,19] :
+    const known_pos_range pr(9,20);
+
+    // non subset tests:
+    BOOST_REQUIRE(! pr.is_superset_of(pos_range(7,8)));
+    BOOST_REQUIRE(! pr.is_superset_of(pos_range(8,10)));
+    BOOST_REQUIRE(! pr.is_superset_of(pos_range(8,20)));
+    BOOST_REQUIRE(! pr.is_superset_of(pos_range(9,21)));
+    BOOST_REQUIRE(! pr.is_superset_of(pos_range(8,21)));
+    BOOST_REQUIRE(! pr.is_superset_of(pos_range(25,30)));
+
+    // subset tests:
+    BOOST_REQUIRE(pr.is_superset_of(pos_range(9,20)));
+    BOOST_REQUIRE(pr.is_superset_of(pos_range(11,20)));
+    BOOST_REQUIRE(pr.is_superset_of(pos_range(9,17)));
+    BOOST_REQUIRE(pr.is_superset_of(pos_range(11,17)));
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/test/prob_util_test.cpp b/src/c++/lib/blt_util/test/prob_util_test.cpp
new file mode 100644
index 0000000..8e734ad
--- /dev/null
+++ b/src/c++/lib/blt_util/test/prob_util_test.cpp
@@ -0,0 +1,151 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "prob_util.hh"
+
+BOOST_AUTO_TEST_SUITE( prob_util )
+
+BOOST_AUTO_TEST_CASE( test_softmax )
+{
+    {
+        static const double val(0.00001);
+        const auto tval = softMaxInverseTransform(val);
+        const auto val2 = softMaxTransform(tval);
+
+        static const double eps = 0.00000001;
+        BOOST_REQUIRE_CLOSE(val2, val, eps);
+    }
+
+    {
+        static const double val(0.75);
+        const auto tval = softMaxInverseTransform(val);
+        const auto val2 = softMaxTransform(tval);
+
+        static const double eps = 0.00000001;
+        BOOST_REQUIRE_CLOSE(val2, val, eps);
+    }
+
+    {
+        static const double val(500);
+        static const double min(-10);
+        static const double max(1000);
+        const auto tval = softMaxInverseTransform(val,min,max);
+        const auto val2 = softMaxTransform(tval,min,max);
+
+        static const double eps = 0.00000001;
+        BOOST_REQUIRE_CLOSE(val2, val, eps);
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE( test_softmax_edgecase )
+{
+    {
+        static const double val(0.);
+        const auto tval = softMaxInverseTransform(val);
+        const auto val2 = softMaxTransform(tval);
+
+        static const double eps = 0.00000001;
+        BOOST_REQUIRE_CLOSE(val2, val, eps);
+    }
+
+    {
+        static const double val(1.);
+        const auto tval = softMaxInverseTransform(val);
+        const auto val2 = softMaxTransform(tval);
+
+        static const double eps = 0.00000001;
+        BOOST_REQUIRE_CLOSE(val2, val, eps);
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE( test_softmax_factor_scale )
+{
+    //
+    // test using softmax as a "safe" way to mult probs by interesting factors:
+    //
+
+    // positiveTest: a case where we can mult by a factor easily
+    {
+        static const double val(0.000001);
+        static const double factor(100);
+        static const double logFactor(std::log(factor));
+        auto tval = softMaxInverseTransform(val);
+        tval += logFactor;
+        const auto val2 = softMaxTransform(tval);
+
+        static const double eps = 0.01;
+        BOOST_REQUIRE_CLOSE(val2, (val*factor), eps);
+    }
+
+    // test2: a case where we need a "soft" max limitation to hold
+    {
+        static const double val(0.0049);
+        static const double factor(100);
+        static const double logFactor(std::log(factor));
+        auto tval = softMaxInverseTransform(val);
+        tval += logFactor;
+        const auto val2 = softMaxTransform(tval);
+
+        static const double eps = 0.01;
+        BOOST_REQUIRE_CLOSE(val2, 0.329944, eps);
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE( test_softmax_range )
+{
+    static const std::vector<double> startVal = { 0.2 , 0.3 , 0.0001 };
+    auto val = startVal;
+    softMaxInverseRangeTransform(val.begin(), val.end());
+    softMaxRangeTransform(val.begin(), val.end());
+
+    static const double eps = 0.00000001;
+    for (unsigned index(0); index < startVal.size(); ++index)
+    {
+        BOOST_REQUIRE_CLOSE(val[index], startVal[index], eps);
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE( test_prob_comp )
+{
+    static const float eps = 0.00000001f;
+
+    // prob simulates a typical strong call posterior where we have strong
+    // evidence for state 0 and expect a high q-score:
+    const float prob[] = {0.99999999f,0.000000002f,0.000000003f,0.000000005f};
+    const unsigned psize(sizeof(prob)/sizeof(float));
+
+    const float expect = 0.00000001f;
+    BOOST_REQUIRE_CLOSE(prob_comp(prob,prob+psize,0), expect, eps);
+
+    // uncomment this case to demo why prob_comp is used:
+#if 0
+    const float val1(1.-prob[0]);
+    BOOST_REQUIRE_CLOSE(val1, expect , eps);
+#endif
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/test/seq_util_test.cpp b/src/c++/lib/blt_util/test/seq_util_test.cpp
new file mode 100644
index 0000000..4415e88
--- /dev/null
+++ b/src/c++/lib/blt_util/test/seq_util_test.cpp
@@ -0,0 +1,62 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+
+#include "boost/test/unit_test.hpp"
+
+#include "blt_util/seq_util.hh"
+
+
+BOOST_AUTO_TEST_SUITE( test_seq_util )
+
+
+BOOST_AUTO_TEST_CASE( test_seq_repeat )
+{
+    std::string ru;
+    unsigned count;
+
+    static const std::string test0="";
+    get_seq_repeat_unit(test0,ru,count);
+    BOOST_REQUIRE_EQUAL(ru, "");
+    BOOST_REQUIRE_EQUAL(count, 1u);
+
+    static const std::string test1="AAAA";
+    get_seq_repeat_unit(test1,ru,count);
+    BOOST_REQUIRE_EQUAL(ru, "A");
+    BOOST_REQUIRE_EQUAL(count, 4u);
+
+    static const std::string test2="ACAC";
+    get_seq_repeat_unit(test2,ru,count);
+    BOOST_REQUIRE_EQUAL(ru, "AC");
+    BOOST_REQUIRE_EQUAL(count, 2u);
+
+    static const std::string test3="TACAC";
+    get_seq_repeat_unit(test3,ru,count);
+    BOOST_REQUIRE_EQUAL(ru, "TACAC");
+    BOOST_REQUIRE_EQUAL(count, 1u);
+
+    get_vcf_seq_repeat_unit(test3,ru,count);
+    BOOST_REQUIRE_EQUAL(ru, "AC");
+    BOOST_REQUIRE_EQUAL(count, 2u);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/test/set_util_test.cpp b/src/c++/lib/blt_util/test/set_util_test.cpp
new file mode 100644
index 0000000..cf3f578
--- /dev/null
+++ b/src/c++/lib/blt_util/test/set_util_test.cpp
@@ -0,0 +1,54 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+
+#include "boost/test/unit_test.hpp"
+
+#include "blt_util/set_util.hh"
+
+
+BOOST_AUTO_TEST_SUITE( test_set_util )
+
+
+BOOST_AUTO_TEST_CASE( test_set_subtract )
+{
+    std::set<int> A;
+    A.insert(1);
+    A.insert(4);
+    A.insert(5);
+    A.insert(6);
+    A.insert(9);
+
+    std::set<int> B;
+    B.insert(2);
+    B.insert(5);
+    B.insert(6);
+    B.insert(7);
+    B.insert(10);
+
+    inplaceSetSubtract(A,B);
+
+    BOOST_REQUIRE_EQUAL(B.size(), 3);
+    BOOST_REQUIRE_EQUAL(*B.begin(), 2);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/test/stage_manager_test.cpp b/src/c++/lib/blt_util/test/stage_manager_test.cpp
new file mode 100644
index 0000000..7b81289
--- /dev/null
+++ b/src/c++/lib/blt_util/test/stage_manager_test.cpp
@@ -0,0 +1,177 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "stage_manager.hh"
+
+//#define DEBUG_SM_TEST
+
+#ifdef DEBUG_SM_TEST
+#include <iostream>
+
+namespace
+{
+std::ostream& log_os(std::cerr);
+}
+#endif
+
+
+BOOST_AUTO_TEST_SUITE( test_stage_manager )
+
+
+// create a standard stage size arrangement for testing:
+//
+// This returns a tree with:
+//   stage 1 following 10 bases behind the root
+//   stage 2 following 20 bases behind stage 1
+//   stage 3 following 20 bases behind the root
+//
+static
+stage_data
+get_test_stage_data()
+{
+    stage_data sd;
+    sd.add_stage(0);
+    sd.add_stage(1,0,10);
+    sd.add_stage(2,1,20);
+    sd.add_stage(3,0,20);
+
+    return sd;
+}
+
+
+static
+void
+stage_data_shift_test(const stage_data& sd,
+                      const int stage_id,
+                      const unsigned expect)
+{
+    const unsigned result(sd.get_stage_id_shift(stage_id));
+    BOOST_CHECK_EQUAL(result,expect);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_stage_data_dist )
+{
+    const stage_data sd(get_test_stage_data());
+
+    stage_data_shift_test(sd,0,0);
+    stage_data_shift_test(sd,1,10);
+    stage_data_shift_test(sd,2,30);
+    stage_data_shift_test(sd,3,20);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_stage_data_bad_parent )
+{
+    stage_data sd;
+    BOOST_CHECK_THROW(sd.add_stage(1,0,10),std::exception);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_stage_data_bad_id )
+{
+    stage_data sd;
+    sd.add_stage(1);
+
+    BOOST_CHECK_THROW(sd.get_stage_id_shift(0),std::exception);
+    BOOST_CHECK_NO_THROW(sd.get_stage_id_shift(1));
+    BOOST_CHECK_THROW(sd.get_stage_id_shift(2),std::exception);
+}
+
+
+/// \brief Minimal pos_processor object used to test stage manager
+///
+/// Note that this object is itself part of the test infrastructure by
+/// asserting:
+///   1. ...that pos increases for each stage
+///   2. TODO: ..the expected relationship (stage-to-root distance vs expect)
+///              of all stages as process_pos is called
+///
+struct test_pos_processor : public pos_processor_base
+{
+    //
+    // TODO: finish setting up stage relationship checking...
+    //
+    // pos_processor wouldn't normally need this info, but we use
+    // it to test expected stage position relationships
+    //
+    //test_pos_processor(const stage_data& sd, const pos_range& pr)
+    //
+
+    void
+    process_pos(const int stage_no,
+                const pos_t pos)
+    {
+#ifdef DEBUG_SM_TEST
+        log_os << "process_pos stage_no: " << stage_no << " pos: " << pos << "\n";
+#endif
+
+        // assert that pos for each stage does not repeat or decrease:
+        spos_t::const_iterator i(stage_pos.find(stage_no));
+        if (i != stage_pos.end())
+        {
+            BOOST_CHECK(pos > (i->second));
+        }
+        stage_pos[stage_no] = pos;
+    }
+
+    typedef std::map<int,pos_t> spos_t;
+    spos_t stage_pos;
+};
+
+
+BOOST_AUTO_TEST_CASE( test_stage_manager )
+{
+    const stage_data sd(get_test_stage_data());
+    const pos_range report_range(0,60);
+    test_pos_processor tpp;
+
+    stage_manager sman(sd,report_range,tpp);
+
+    sman.handle_new_pos_value(40);
+
+    BOOST_CHECK_EQUAL(tpp.stage_pos[0],40);
+    BOOST_CHECK_EQUAL(tpp.stage_pos[1],30);
+    BOOST_CHECK_EQUAL(tpp.stage_pos[2],10);
+    BOOST_CHECK_EQUAL(tpp.stage_pos[3],20);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_stage_manager_reset )
+{
+    const stage_data sd(get_test_stage_data());
+    const pos_range report_range(0,60);
+    test_pos_processor tpp;
+
+    stage_manager sman(sd,report_range,tpp);
+
+    sman.reset();
+
+    for (int i(0); i<4; ++i)
+    {
+        BOOST_CHECK_EQUAL(tpp.stage_pos[i],59);
+    }
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/test/stream_stat_test.cpp b/src/c++/lib/blt_util/test/stream_stat_test.cpp
new file mode 100644
index 0000000..dae9557
--- /dev/null
+++ b/src/c++/lib/blt_util/test/stream_stat_test.cpp
@@ -0,0 +1,48 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "stream_stat.hh"
+
+
+BOOST_AUTO_TEST_SUITE( test_stream_stat )
+
+
+BOOST_AUTO_TEST_CASE( test_stream_stat )
+{
+    static const double eps(0.00001);
+
+    stream_stat ss;
+
+    ss.add(3.);
+    ss.add(4.);
+    ss.add(5.);
+
+    BOOST_REQUIRE_EQUAL(ss.size(),3);
+    BOOST_REQUIRE_CLOSE(ss.min(), 3., eps);
+    BOOST_REQUIRE_CLOSE(ss.max(), 5., eps);
+    BOOST_REQUIRE_CLOSE(ss.mean(), 4., eps);
+    BOOST_REQUIRE_CLOSE(ss.sd(), 1., eps);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/test/string_util_test.cpp b/src/c++/lib/blt_util/test/string_util_test.cpp
new file mode 100644
index 0000000..c3c3867
--- /dev/null
+++ b/src/c++/lib/blt_util/test/string_util_test.cpp
@@ -0,0 +1,90 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "string_util.hh"
+
+#include <cstring>
+
+
+BOOST_AUTO_TEST_SUITE( string_util )
+
+static const char* test_string("234562342");
+
+template <typename T>
+void
+test_split_string_bytype(T test)
+{
+    std::vector<std::string> result;
+
+    split_string(test,'2',result);
+    BOOST_REQUIRE_EQUAL(static_cast<int>(result.size()), 4);
+    BOOST_REQUIRE_EQUAL(result[0], "");
+    BOOST_REQUIRE_EQUAL(result[1], "3456");
+    BOOST_REQUIRE_EQUAL(result[3], "");
+
+    split_string(test,'2',result, true);
+    BOOST_REQUIRE_EQUAL(static_cast<int>(result.size()), 2);
+    BOOST_REQUIRE_EQUAL(result[0], "3456");
+
+    split_string(test,'X',result);
+    BOOST_REQUIRE_EQUAL(static_cast<int>(result.size()), 1);
+    BOOST_REQUIRE_EQUAL(result[0], test);
+
+    split_string("",'X',result);
+    BOOST_REQUIRE_EQUAL(static_cast<int>(result.size()), 1);
+    BOOST_REQUIRE_EQUAL(result[0], "");
+}
+
+BOOST_AUTO_TEST_CASE( test_split_string_cstr )
+{
+    test_split_string_bytype(test_string);
+}
+
+BOOST_AUTO_TEST_CASE( test_split_string )
+{
+    const std::string test(test_string);
+    test_split_string_bytype(test);
+}
+
+BOOST_AUTO_TEST_CASE( test_destructive_split_string )
+{
+    std::unique_ptr<char[]> tmp(new char[strlen(test_string)+1]);
+    std::strcpy(tmp.get(),test_string);
+
+    std::vector<const char*> result;
+    destructive_split_string(tmp.get(),'2',result);
+    BOOST_REQUIRE_EQUAL(static_cast<int>(result.size()), 4);
+    BOOST_REQUIRE_EQUAL(std::strcmp(result[0], ""),0);
+    BOOST_REQUIRE_EQUAL(std::strcmp(result[1], "3456"),0);
+    BOOST_REQUIRE_EQUAL(std::strcmp(result[3], ""),0);
+}
+
+BOOST_AUTO_TEST_CASE( test_split_match )
+{
+    const std::string test(test_string);
+
+    BOOST_REQUIRE(split_match(test,'2',"34"));
+    BOOST_REQUIRE(! split_match(test,'2',"XX"));
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/c++/lib/blt_util/test/test_main.cpp b/src/c++/lib/blt_util/test/test_main.cpp
new file mode 100644
index 0000000..312e5ab
--- /dev/null
+++ b/src/c++/lib/blt_util/test/test_main.cpp
@@ -0,0 +1,23 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#define BOOST_TEST_MODULE libblt_util
+#include "boost/test/unit_test.hpp"
+
diff --git a/src/c++/lib/blt_util/test/window_util_test.cpp b/src/c++/lib/blt_util/test/window_util_test.cpp
new file mode 100644
index 0000000..0fec4a9
--- /dev/null
+++ b/src/c++/lib/blt_util/test/window_util_test.cpp
@@ -0,0 +1,73 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "window_util.hh"
+
+#include <iostream>
+
+
+BOOST_AUTO_TEST_SUITE( test_window )
+
+
+BOOST_AUTO_TEST_CASE( test_window )
+{
+    static const double tol(0.0001);
+
+    window_average wa(3);
+    wa.insert(0);
+    BOOST_REQUIRE_EQUAL(wa.size(),1);
+    BOOST_REQUIRE_CLOSE(wa.avg(), 0., tol);
+    wa.insert(1);
+    BOOST_REQUIRE_EQUAL(wa.size(),2);
+    BOOST_REQUIRE_CLOSE(wa.avg(), 0.5, tol);
+    wa.insert(2);
+    BOOST_REQUIRE_EQUAL(wa.size(),3);
+    BOOST_REQUIRE_CLOSE(wa.avg(), 1., tol);
+    wa.insert(3);
+    BOOST_REQUIRE_EQUAL(wa.size(),3);
+    BOOST_REQUIRE_CLOSE(wa.avg(), 2., tol);
+
+    wa.insert_null();
+    BOOST_REQUIRE_EQUAL(wa.size(),2);
+    BOOST_REQUIRE_CLOSE(wa.avg(), 2.5, tol);
+    wa.insert_null();
+    BOOST_REQUIRE_EQUAL(wa.size(),1);
+    BOOST_REQUIRE_CLOSE(wa.avg(), 3., tol);
+    wa.insert_null();
+    BOOST_REQUIRE_EQUAL(wa.size(),0);
+
+    wa.insert(0);
+    BOOST_REQUIRE_EQUAL(wa.size(),1);
+    BOOST_REQUIRE_CLOSE(wa.avg(), 0., tol);
+    wa.insert(1);
+    BOOST_REQUIRE_EQUAL(wa.size(),2);
+    BOOST_REQUIRE_CLOSE(wa.avg(), 0.5, tol);
+    wa.insert(2);
+    BOOST_REQUIRE_EQUAL(wa.size(),3);
+    BOOST_REQUIRE_CLOSE(wa.avg(), 1., tol);
+    wa.insert(3);
+    BOOST_REQUIRE_EQUAL(wa.size(),3);
+    BOOST_REQUIRE_CLOSE(wa.avg(), 2., tol);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/blt_util/thirdparty_pop.h b/src/c++/lib/blt_util/thirdparty_pop.h
new file mode 100644
index 0000000..aba4f51
--- /dev/null
+++ b/src/c++/lib/blt_util/thirdparty_pop.h
@@ -0,0 +1,28 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+// special header to 'wrap' third-party libraries with common alt compiler settings
+
+#ifdef _MSC_VER
+
+#pragma warning( pop )
+
+#endif
+
diff --git a/src/c++/lib/blt_util/thirdparty_push.h b/src/c++/lib/blt_util/thirdparty_push.h
new file mode 100644
index 0000000..49e374a
--- /dev/null
+++ b/src/c++/lib/blt_util/thirdparty_push.h
@@ -0,0 +1,30 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+// special header to 'wrap' third-party libraries with common alt compiler settings
+
+#ifdef _MSC_VER
+
+#include <codeanalysis\warnings.h>
+#pragma warning( push, 0 )
+#pragma warning ( disable : ALL_CODE_ANALYSIS_WARNINGS )
+
+#endif
+
diff --git a/src/c++/lib/blt_util/time_util.cpp b/src/c++/lib/blt_util/time_util.cpp
new file mode 100644
index 0000000..96f0969
--- /dev/null
+++ b/src/c++/lib/blt_util/time_util.cpp
@@ -0,0 +1,52 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "io_util.hh"
+#include "time_util.hh"
+
+#include <iomanip>
+#include <iostream>
+
+
+
+void
+CpuTimes::
+report(
+    const double factor,
+    const char* tlabel,
+    std::ostream& os) const
+{
+    StreamScoper scoper(os);
+    os << std::fixed << std::setprecision(4);
+    const double fwall(wall*factor);
+    const double fuser(user*factor);
+    const double fsystem(system*factor);
+    const double total(fuser+fsystem);
+    const double perc(100*total/fwall);
+    os << fwall << tlabel << " wall, "
+       << fuser << tlabel << " user + "
+       << fsystem << tlabel << " system = "
+       << total << tlabel
+       << " CPU (" << std::setprecision(2) << perc << "%)";
+}
diff --git a/src/c++/lib/blt_util/time_util.hh b/src/c++/lib/blt_util/time_util.hh
new file mode 100644
index 0000000..8604774
--- /dev/null
+++ b/src/c++/lib/blt_util/time_util.hh
@@ -0,0 +1,202 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "boost/chrono.hpp"
+#include "boost/serialization/nvp.hpp"
+#include "boost/timer/timer.hpp"
+#include "boost/utility.hpp"
+
+#include <iosfwd>
+
+
+namespace BOOST_TIMER_HELPER
+{
+inline
+double
+getTimerSeconds(
+    const boost::timer::nanosecond_type& ns)
+{
+    using namespace boost::chrono;
+    return static_cast<double>(duration_cast<microseconds>(nanoseconds(ns)).count())/1000000.;
+}
+}
+
+/// this is a replacement for boost::timer cpu_times
+/// with serialization/merge, etc...
+struct CpuTimes
+{
+    CpuTimes() {}
+
+    explicit
+    CpuTimes(
+        const boost::timer::cpu_times& t)
+        : wall(BOOST_TIMER_HELPER::getTimerSeconds(t.wall)),
+          user(BOOST_TIMER_HELPER::getTimerSeconds(t.user)),
+          system(BOOST_TIMER_HELPER::getTimerSeconds(t.system))
+    {}
+
+    void
+    merge(
+        const CpuTimes& rhs)
+    {
+        wall += rhs.wall;
+        user += rhs.user;
+        system += rhs.system;
+    }
+
+    void
+    difference(
+        const CpuTimes& rhs)
+    {
+        wall -= rhs.wall;
+        user -= rhs.user;
+        system -= rhs.system;
+    }
+
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /*version*/)
+    {
+        ar& BOOST_SERIALIZATION_NVP(wall)
+        & BOOST_SERIALIZATION_NVP(user)
+        & BOOST_SERIALIZATION_NVP(system)
+        ;
+    }
+
+    void
+    reportSec(
+        std::ostream& os) const
+    {
+        static const char tlabel('s');
+        static const double factor(1.);
+        report(factor,&tlabel,os);
+    }
+
+    void
+    reportHr(
+        std::ostream& os) const
+    {
+        static const char tlabel('h');
+        static const double factor(1./3600.);
+        report(factor,&tlabel,os);
+    }
+
+    void
+    report(
+        const double factor,
+        const char* tlabel,
+        std::ostream& os) const;
+
+    double wall = 0.;
+    double user = 0.;
+    double system = 0.;
+};
+
+BOOST_CLASS_IMPLEMENTATION(CpuTimes, boost::serialization::object_serializable)
+
+
+
+/// simple time track utility
+struct TimeTracker
+{
+    TimeTracker()
+    {
+        _timer.stop();
+    }
+
+    void
+    clear()
+    {
+        _isReset = true;
+    }
+
+    /// starts clock without reset to accumulate total time
+    void
+    resume()
+    {
+        //assert((! _isStart) && "clock is running");
+        if (_isReset)
+        {
+            _timer.start();
+            _isReset = false;
+        }
+        else _timer.resume();
+    }
+
+    /// stop clock
+    void
+    stop()
+    {
+        _timer.stop();
+    }
+
+    CpuTimes
+    getTimes() const
+    {
+        static const CpuTimes zero;
+        if (_isReset) return zero;
+        return CpuTimes(_timer.elapsed());
+    }
+
+    /// DEPRECATED get user cpu time in seconds
+    ///
+    /// timer must be stopped
+    double
+    getUserSeconds() const
+    {
+        return getTimes().user;
+    }
+
+    /// DEPRECATED get user cpu time in seconds
+    ///
+    /// timer must be stopped
+    double
+    getWallSeconds() const
+    {
+        return getTimes().wall;
+    }
+
+private:
+    bool _isReset = true;
+    boost::timer::cpu_timer _timer;
+};
+
+
+/// utility for timetracker for scope based start-stop scenarios:
+struct TimeScoper : private boost::noncopyable
+{
+    explicit
+    TimeScoper(TimeTracker& t) : _t(t)
+    {
+        _t.resume();
+    }
+
+    ~TimeScoper()
+    {
+        _t.stop();
+    }
+private:
+    TimeTracker& _t;
+};
diff --git a/src/c++/lib/blt_util/window_util.hh b/src/c++/lib/blt_util/window_util.hh
new file mode 100644
index 0000000..1e260d2
--- /dev/null
+++ b/src/c++/lib/blt_util/window_util.hh
@@ -0,0 +1,128 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <iosfwd>
+#include <vector>
+
+
+// maintains the average of a set in a sliding window
+//
+struct window_average
+{
+    window_average(const int32_t init_full_size)
+        : _total(0)
+        , _size(0)
+        , _null_size(0)
+        , _head(0)
+        , _full_size(init_full_size)
+        , _buf(init_full_size)
+        , _is_buf(init_full_size)
+    {
+        assert(init_full_size>0);
+    }
+
+    void
+    reset()
+    {
+        _total=0;
+        _size=0;
+        _null_size=0;
+        _head=0;
+    }
+
+    void
+    insert(const int32_t x)
+    {
+        pre_insert();
+        _total += x;
+        _buf[_head] = x;
+        _is_buf[_head] = true;
+        _head=(_head+1)%_full_size;
+    }
+
+    // inserts an N/A value:
+    void
+    insert_null()
+    {
+        pre_insert();
+        _null_size++;
+        _buf[_head] = 0;
+        _is_buf[_head] = false;
+        _head=(_head+1)%_full_size;
+    }
+
+    uint32_t
+    full_size() const
+    {
+        return _full_size;
+    }
+
+    // actual number of data points currently in the window, this can
+    // be less than data size due to initialization or N/A values
+    //
+    uint32_t
+    size() const
+    {
+        return (_size-_null_size);
+    }
+
+    float
+    avg() const
+    {
+        return (static_cast<float>(_total)/static_cast<float>(size()));
+    }
+
+private:
+    void
+    pre_insert()
+    {
+        if (_size==_full_size)
+        {
+            if (_is_buf[_head])
+            {
+                _total -= _buf[_head];
+            }
+            else
+            {
+                _null_size--;
+            }
+        }
+        else
+        {
+            _size++;
+        }
+    }
+
+    int64_t _total;
+    uint32_t _size;
+    uint32_t _null_size;
+    uint32_t _head;
+    uint32_t _full_size;
+    std::vector<int32_t> _buf;
+    std::vector<bool> _is_buf;
+};
diff --git a/src/c++/lib/common/CMakeLists.txt b/src/c++/lib/common/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/common/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/common/Exceptions.cpp b/src/c++/lib/common/Exceptions.cpp
new file mode 100644
index 0000000..bbab286
--- /dev/null
+++ b/src/c++/lib/common/Exceptions.cpp
@@ -0,0 +1,116 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/**
+ ** \brief Implementation of the common exception mechanism.
+ **
+ ** \author Come Raczy
+ **/
+
+#include <cstring>
+#include <cerrno>
+#include <boost/date_time.hpp>
+
+#include "common/Exceptions.hh"
+
+namespace illumina
+{
+namespace common
+{
+
+ExceptionData::ExceptionData(int errorNumber, const std::string& message) : boost::exception(),
+    errorNumber_(errorNumber), message_(message)
+{
+}
+
+std::string ExceptionData::getContext() const
+{
+    const std::string now = boost::posix_time::to_simple_string(boost::posix_time::second_clock::local_time());
+    return now + ": " + std::string(strerror(errorNumber_)) + ": " + boost::diagnostic_information(*this);
+}
+
+IoException::IoException(int errorNumber, const std::string& message)
+    : std::ios_base::failure(message)
+    , ExceptionData(errorNumber, message)
+{
+}
+
+ResourceException::ResourceException(int errorNumber, const std::string& message)
+    : ExceptionData(errorNumber, message)
+{
+}
+
+
+MemoryException::MemoryException(const std::string& message)
+    : std::bad_alloc(),
+      ExceptionData(ENOMEM, message)
+{
+}
+
+UnsupportedVersionException::UnsupportedVersionException(const std::string& message)
+    : std::logic_error(message)
+    , ExceptionData(EINVAL, message)
+{
+}
+
+FeatureNotAvailable::FeatureNotAvailable(const std::string& message)
+    : std::logic_error(message)
+    , ExceptionData(EINVAL, message)
+{
+}
+
+InvalidParameterException::InvalidParameterException(const std::string& message)
+    : std::logic_error(message)
+    , ExceptionData(EINVAL, message)
+{
+}
+
+InvalidOptionException::InvalidOptionException(const std::string& message)
+    : std::logic_error(message)
+    , ExceptionData(EINVAL, message)
+{
+}
+
+PreConditionException::PreConditionException(const std::string& message)
+    : std::logic_error(message)
+    , ExceptionData(EINVAL, message)
+{
+}
+
+PostConditionException::PostConditionException(const std::string& message)
+    : std::logic_error(message)
+    , ExceptionData(EINVAL, message)
+{
+}
+
+OutOfBoundsException::OutOfBoundsException(const std::string& message)
+    : std::out_of_range("OutOfBoundsException: " + message)
+    , ExceptionData(EINVAL, message)
+{
+}
+
+VcfException::VcfException(const std::string& message)
+    : IoException(EPROTO, std::string("VCF failure: ") + message)
+{
+}
+
+
+}
+}
diff --git a/src/c++/lib/common/Exceptions.hh b/src/c++/lib/common/Exceptions.hh
new file mode 100644
index 0000000..9d8fe1e
--- /dev/null
+++ b/src/c++/lib/common/Exceptions.hh
@@ -0,0 +1,230 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/**
+ ** \brief Declaration of the common exception mechanism.
+ **
+ ** All exceptions must carry the same data (independently of the
+ ** exception type) to homogenixe the reporting and processing of
+ ** errors.
+ **
+ ** \author Come Raczy
+ **/
+
+#pragma once
+
+
+#include "blt_util/thirdparty_push.h"
+
+#include "boost/cerrno.hpp"
+#include "boost/lexical_cast.hpp"
+#include "boost/exception/all.hpp"
+#include "boost/throw_exception.hpp"
+
+#include "blt_util/thirdparty_pop.h"
+
+#include <ios>
+#include <stdexcept>
+#include <string>
+
+namespace illumina
+{
+namespace common
+{
+
+/// this type can be used to add more info onto an in-flight exception:
+///
+typedef boost::error_info<struct extra_exception_message,std::string> ExceptionMsg;
+
+
+/**
+ ** \brief Virtual base class to all the exception classes
+ **
+ ** Use BOOST_THROW_EXCEPTION to get the contect info (file, function, line)
+ ** at the throw site.
+ **/
+class ExceptionData : public boost::exception
+{
+public:
+    ExceptionData(int errorNumber=0, const std::string& message="");
+    ExceptionData(const ExceptionData&) = default;
+    ExceptionData& operator=(const ExceptionData&) = delete;
+
+    int getErrorNumber() const
+    {
+        return errorNumber_;
+    }
+    const std::string& getMessage() const
+    {
+        return message_;
+    }
+    std::string getContext() const;
+private:
+    const int errorNumber_;
+    const std::string message_;
+};
+
+class IlluminaException : public std::exception, public ExceptionData
+{
+public:
+    IlluminaException(int errorNumber, const std::string& message) : ExceptionData(errorNumber, message) {}
+    IlluminaException(const IlluminaException& e) : std::exception(e), ExceptionData(e) {}
+private:
+    IlluminaException& operator=(const IlluminaException&);
+};
+
+/**
+ * \brief Exception thrown when there are problems with the IO operations
+ */
+class IoException: public std::ios_base::failure, public ExceptionData
+{
+public:
+    IoException(int errorNumber, const std::string& message);
+};
+
+/**
+ * \brief Exception thrown when there is insufficient resources to perform an operation. For example
+ *        if the adjusting the soft ulimit fails due to a set hard limit
+ */
+class ResourceException: public std::exception, public ExceptionData
+{
+public:
+    ResourceException(int errorNumber, const std::string& message);
+};
+
+/**
+ * \brief Same as bad_alloc but with a message
+ */
+class MemoryException: public std::bad_alloc, public ExceptionData
+{
+public:
+    explicit
+    MemoryException(const std::string& message);
+};
+
+/**
+ ** \brief Exception thrown when the client supplied and unsupported version number.
+ **
+ ** Particularly relevant to data format and software versions
+ ** (Pipeline, IPAR, Phoenix, etc.). It should not be used in
+ ** situations where the client didn't have the possibility to check
+ ** the version (for instance when reading the version of a data
+ ** format from the header of a file).
+ **
+ **/
+class UnsupportedVersionException: public std::logic_error, public ExceptionData
+{
+public:
+    explicit
+    UnsupportedVersionException(const std::string& message);
+};
+
+/**
+ ** \brief Thrown when the requested functionality is not available.
+ **
+ **/
+class FeatureNotAvailable: public std::logic_error, public ExceptionData
+{
+public:
+    explicit
+    FeatureNotAvailable(const std::string& message);
+};
+
+/**
+ ** \brief Exception thrown when the client supplied an invalid parameter.
+ **
+ **/
+class InvalidParameterException: public std::logic_error, public ExceptionData
+{
+public:
+    explicit
+    InvalidParameterException(const std::string& message);
+};
+
+/**
+ ** \brief Exception thrown when an invalid command line option was detected.
+ **
+ **/
+class InvalidOptionException: public std::logic_error, public ExceptionData
+{
+public:
+    explicit
+    InvalidOptionException(const std::string& message);
+};
+
+/**
+ ** \brief Exception thrown when a method invocation violates the pre-conditions.
+ **
+ **/
+class PreConditionException: public std::logic_error, public ExceptionData
+{
+public:
+    explicit
+    PreConditionException(const std::string& message);
+};
+
+/**
+ ** \brief Exception thrown when a method invocation violates the post-conditions.
+ **
+ **/
+class PostConditionException: public std::logic_error, public ExceptionData
+{
+public:
+    explicit
+    PostConditionException(const std::string& message);
+};
+
+/**
+ ** \brief Exception thrown when a method invocation generates an out-of-limits situation.
+ **
+ **/
+class OutOfBoundsException: public std::out_of_range, public ExceptionData
+{
+public:
+    explicit
+    OutOfBoundsException(const std::string& message);
+};
+
+/// General purpose exception for all other cases:
+///
+struct LogicException: public std::logic_error, public ExceptionData
+{
+    explicit
+    LogicException(const std::string& message) :
+        std::logic_error(message),
+        ExceptionData(EPERM, message)
+    {}
+};
+
+
+/**
+ ** \brief Exception thrown when a VCF violation is encountered.
+ **
+ **/
+class VcfException: public IoException
+{
+public:
+    explicit
+    VcfException(const std::string& message);
+};
+
+
+}
+}
diff --git a/src/c++/lib/common/OutStream.cpp b/src/c++/lib/common/OutStream.cpp
new file mode 100644
index 0000000..60fa5a7
--- /dev/null
+++ b/src/c++/lib/common/OutStream.cpp
@@ -0,0 +1,78 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "common/OutStream.hh"
+
+#include "common/Exceptions.hh"
+
+#include <fstream>
+#include <iostream>
+
+
+OutStream::
+OutStream(const std::string& fileName) :
+    _isInit(false),
+    _fileName(fileName),
+    _osptr(&std::cout),
+    _ofsptr(new std::ofstream)
+{
+    if (! _fileName.empty())
+    {
+        std::ofstream test;
+        openFile(_fileName,test);
+    }
+}
+
+
+
+// required for unique_ptr:
+OutStream::
+~OutStream() {}
+
+
+
+void
+OutStream::
+initStream()
+{
+    if (! _fileName.empty())
+    {
+        openFile(_fileName,*_ofsptr);
+        _osptr=_ofsptr.get();
+    }
+    _isInit=true;
+}
+
+void
+OutStream::
+openFile(
+    const std::string& filename,
+    std::ofstream& ofs)
+{
+    ofs.open(filename.c_str());
+    if (ofs) return;
+    std::ostringstream oss;
+    oss << "ERROR: Can't open output file: " << filename << "\n";
+    BOOST_THROW_EXCEPTION(illumina::common::LogicException(oss.str()));
+}
diff --git a/src/c++/lib/common/OutStream.hh b/src/c++/lib/common/OutStream.hh
new file mode 100644
index 0000000..35a0018
--- /dev/null
+++ b/src/c++/lib/common/OutStream.hh
@@ -0,0 +1,63 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <iosfwd>
+#include <memory>
+#include <string>
+
+
+
+/// provide an output stream which comes from either a file or a tty default
+///
+struct OutStream
+{
+    OutStream(const std::string& fileName);
+
+    ~OutStream();
+
+    std::ostream&
+    getStream()
+    {
+        if (! _isInit) initStream();
+        return *_osptr;
+    }
+
+private:
+
+    void
+    initStream();
+
+    static
+    void
+    openFile(
+        const std::string& filename,
+        std::ofstream& ofs);
+
+    bool _isInit;
+    std::string _fileName;
+    std::ostream* _osptr;
+    std::unique_ptr<std::ofstream> _ofsptr;
+};
diff --git a/src/c++/lib/common/Program.cpp b/src/c++/lib/common/Program.cpp
new file mode 100644
index 0000000..e4e18c2
--- /dev/null
+++ b/src/c++/lib/common/Program.cpp
@@ -0,0 +1,150 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \author Chris Saunders
+///
+
+#include "Program.hh"
+#include "ProgramConfig.hh"
+#include "Exceptions.hh"
+
+#include "blt_util/blt_exception.hh"
+#include "blt_util/log.hh"
+#include "blt_util/sig_handler.hh"
+
+#include <cstdlib>
+
+#include <iostream>
+
+
+
+static
+void
+dump_cl(int argc,
+        char* argv[],
+        std::ostream& os)
+{
+    os << "cmdline:";
+    for (int i(0); i<argc; ++i)
+    {
+        os << ' ' << argv[i];
+    }
+    os << "\n";
+}
+
+
+namespace illumina
+{
+
+
+const char*
+Program::
+version() const
+{
+    return getVersion();
+}
+
+const char*
+Program::
+compiler() const
+{
+    static const std::string _str(
+        cxxCompilerName() +
+        std::string("-") +
+        compilerVersion());
+    return _str.c_str();
+
+}
+
+const char*
+Program::
+buildTime() const
+{
+    return getBuildTime();
+}
+
+
+void
+Program::
+post_catch(
+    int argc,
+    char* argv[],
+    std::ostream& os) const
+{
+    os << "...caught in program.run()\n";
+    dump_cl(argc,argv,log_os);
+    os << "version:\t" << version() << "\n";
+    os << "buildTime:\t" << buildTime() << "\n";
+    os << "compiler:\t" << compiler() << "\n";
+    os << std::flush;
+    exit(EXIT_FAILURE);
+}
+
+
+int
+Program::
+run(int argc, char* argv[]) const
+{
+    try
+    {
+        std::ios_base::sync_with_stdio(false);
+
+        std::string cmdline;
+        for (int i(0); i<argc; ++i)
+        {
+            if (i) cmdline += ' ';
+            cmdline += argv[i];
+        }
+
+        initialize_blt_signals(name(),cmdline.c_str());
+
+        runInternal(argc,argv);
+    }
+    catch (const blt_exception& e)
+    {
+        log_os << "FATAL_ERROR: " << name() << " EXCEPTION: " << e.what() << "\n";
+        post_catch(argc,argv,log_os);
+    }
+    catch (const illumina::common::ExceptionData& e)
+    {
+        log_os << "FATAL_ERROR: " << name() << " EXCEPTION: "
+               << e.getContext() << ": " << e.getMessage() << "\n";
+        post_catch(argc,argv,log_os);
+    }
+    catch (const boost::exception& e)
+    {
+        log_os << "FATAL_ERROR: " << name() << " EXCEPTION: "
+               << boost::diagnostic_information(e) << "\n";
+        post_catch(argc,argv,log_os);
+    }
+    catch (const std::exception& e)
+    {
+        log_os << "FATAL_ERROR: EXCEPTION: " << e.what() << "\n";
+        post_catch(argc,argv,log_os);
+    }
+    catch (...)
+    {
+        log_os << "FATAL_ERROR: UNKNOWN EXCEPTION\n";
+        post_catch(argc,argv,log_os);
+    }
+    return EXIT_SUCCESS;
+}
+
+}
diff --git a/src/c++/lib/common/Program.hh b/src/c++/lib/common/Program.hh
new file mode 100644
index 0000000..890d083
--- /dev/null
+++ b/src/c++/lib/common/Program.hh
@@ -0,0 +1,68 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <iosfwd>
+
+namespace illumina
+{
+
+/// base-class for all command-line programs
+///
+/// this is used to standardize bottom-level exception handling
+struct Program
+{
+    virtual
+    ~Program() {}
+
+    int
+    run(int argc, char* argv[]) const;
+
+    virtual
+    const char*
+    name() const = 0;
+
+    const char*
+    version() const;
+
+    const char*
+    compiler() const;
+
+    const char*
+    buildTime() const;
+
+protected:
+    virtual
+    void
+    runInternal(int argc, char* argv[]) const = 0;
+
+private:
+    void
+    post_catch(
+        int argc,
+        char* argv[],
+        std::ostream& os) const;
+};
+
+}
diff --git a/src/c++/lib/common/ProgramConfig.hh b/src/c++/lib/common/ProgramConfig.hh
new file mode 100644
index 0000000..31c2954
--- /dev/null
+++ b/src/c++/lib/common/ProgramConfig.hh
@@ -0,0 +1,58 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \brief provide access to cmake project version numbers
+
+#pragma once
+
+#include "common/config.h"
+
+namespace illumina
+{
+
+inline
+const char*
+getVersion()
+{
+    return WORKFLOW_VERSION;
+}
+
+inline
+const char*
+getBuildTime()
+{
+    return BUILD_TIME;
+}
+
+inline
+const char*
+cxxCompilerName()
+{
+    return CXX_COMPILER_NAME;
+}
+
+inline
+const char*
+compilerVersion()
+{
+    return COMPILER_VERSION;
+}
+
+}
diff --git a/src/c++/lib/common/ProgramUtil.cpp b/src/c++/lib/common/ProgramUtil.cpp
new file mode 100644
index 0000000..c8ad14e
--- /dev/null
+++ b/src/c++/lib/common/ProgramUtil.cpp
@@ -0,0 +1,51 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \author Chris Saunders
+///
+
+#include "ProgramUtil.hh"
+
+#include <iostream>
+
+
+
+void
+usage(
+    std::ostream& os,
+    const illumina::Program& prog,
+    const boost::program_options::options_description& visible,
+    const char* desc,
+    const char* afteropts,
+    const char* msg)
+{
+    os << "\n" << prog.name() << ": " << desc << "\n\n";
+    os << "version: " << prog.version() << "\n";
+    os << "compiler: " << prog.compiler() << "\n";
+    os << "build-time: " << prog.buildTime() << "\n\n";
+    os << "usage: " << prog.name() << " [options]" << afteropts << "\n\n";
+    os << visible << "\n\n";
+
+    if (nullptr != msg)
+    {
+        os << msg << "\n\n";
+    }
+    exit(2);
+}
diff --git a/src/c++/lib/common/ProgramUtil.hh b/src/c++/lib/common/ProgramUtil.hh
new file mode 100644
index 0000000..e6cf4f1
--- /dev/null
+++ b/src/c++/lib/common/ProgramUtil.hh
@@ -0,0 +1,40 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "Program.hh"
+
+#include "boost/program_options.hpp"
+
+#include <iosfwd>
+
+
+void
+usage(
+    std::ostream& os,
+    const illumina::Program& prog,
+    const boost::program_options::options_description& visible,
+    const char* desc,
+    const char* afteropts,
+    const char* msg);
diff --git a/src/c++/lib/common/ReadPairOrient.cpp b/src/c++/lib/common/ReadPairOrient.cpp
new file mode 100644
index 0000000..f88ed3b
--- /dev/null
+++ b/src/c++/lib/common/ReadPairOrient.cpp
@@ -0,0 +1,39 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/**
+ ** \brief Encapsulation of the concept of a read pair relative orientation.
+ **
+ ** Encapsulation of the concept of a read pair relative orientation.
+ **
+ ** \author Richard Shaw
+ **/
+
+#include "common/ReadPairOrient.hh"
+
+#include <iostream>
+
+
+std::ostream&
+operator<<(std::ostream& os, const ReadPairOrient& rpo)
+{
+    os << PAIR_ORIENT::label(rpo.val());
+    return os;
+}
diff --git a/src/c++/lib/common/ReadPairOrient.hh b/src/c++/lib/common/ReadPairOrient.hh
new file mode 100644
index 0000000..4f1e52c
--- /dev/null
+++ b/src/c++/lib/common/ReadPairOrient.hh
@@ -0,0 +1,147 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \brief Encapsulation of the concept of a read pair relative orientation.
+///
+/// \author Richard Shaw
+
+#pragma once
+
+#include "blt_util/blt_types.hh"
+
+#include "blt_util/thirdparty_push.h"
+
+#include "boost/serialization/access.hpp"
+#include "boost/serialization/level.hpp"
+#include "boost/serialization/nvp.hpp"
+
+#include "blt_util/thirdparty_pop.h"
+
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+
+#include <iosfwd>
+#include <string>
+
+
+namespace PAIR_ORIENT
+{
+
+enum index_t { UNKNOWN, Fm, Fp, Rm, Rp, SIZE };
+
+inline
+const char*
+label(const index_t i)
+{
+    switch (i)
+    {
+    case Fm:
+        return "Fm";
+    case Fp:
+        return "Fp";
+    case Rm:
+        return "Rm";
+    case Rp:
+        return "Rp";
+    default:
+        return "UNKNOWN";
+    }
+}
+
+inline
+index_t
+get_index(const pos_t pos1, const bool is_fwd_strand1,
+          const pos_t pos2, const bool is_fwd_strand2)
+{
+    const bool is_read1_left(pos1 < pos2);
+
+    if (is_fwd_strand1 != is_fwd_strand2)
+    {
+        // special-case very short fragments as innies:
+        //
+        // a few bases of overhang are allowed to account for random matches of
+        // the reverse read to the primer
+        //
+        if (std::abs(pos1-pos2) <= 2) return Rp;
+
+        const bool left_strand(is_read1_left
+                               ? is_fwd_strand1
+                               : is_fwd_strand2);
+        return (left_strand ? Rp : Rm);
+    }
+    else
+    {
+        return ((is_read1_left == is_fwd_strand1) ? Fp : Fm);
+    }
+}
+
+/// inefficient label to id lookup, returns SIZE for unknown string:
+inline
+index_t
+get_index(const char* str)
+{
+    for (int i(0); i<SIZE; ++i)
+    {
+        if (0==strcmp(str,label(static_cast<index_t>(i)))) return static_cast<index_t>(i);
+    }
+    return SIZE;
+}
+}
+
+
+/// pair orientation status wrapper:
+struct ReadPairOrient
+{
+    ReadPairOrient()
+        : _val(PAIR_ORIENT::UNKNOWN)
+    {}
+
+    PAIR_ORIENT::index_t
+    val() const
+    {
+        return _val;
+    }
+
+    void
+    setVal(const unsigned newVal)
+    {
+        assert(newVal < PAIR_ORIENT::SIZE);
+        _val=static_cast<PAIR_ORIENT::index_t>(newVal);
+    }
+
+private:
+    PAIR_ORIENT::index_t _val;
+
+    friend class boost::serialization::access;
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /*version*/)
+    {
+        std::string strval(PAIR_ORIENT::label(_val));
+        ar& boost::serialization::make_nvp("pairOrientation", strval);
+        _val = PAIR_ORIENT::get_index(strval.c_str());
+    }
+};
+
+BOOST_CLASS_IMPLEMENTATION(ReadPairOrient, boost::serialization::object_serializable)
+
+
+std::ostream&
+operator<<(std::ostream& os, const ReadPairOrient& rpo);
diff --git a/src/c++/lib/common/config.h.in b/src/c++/lib/common/config.h.in
new file mode 100644
index 0000000..529ed12
--- /dev/null
+++ b/src/c++/lib/common/config.h.in
@@ -0,0 +1,127 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \brief Various system-specific definitions (configured by cmake)
+///
+
+#include "configBuildTimeInfo.h"
+
+/* c++/include/config.h.cmake. Manually edited */
+/* c++/include/config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* Helper macros to get the version string */
+/* const std::string version(EXPAND(VERSION));  */
+
+#define STRINGIFY(x) #x
+#define EXPAND(x) STRINGIFY(x)
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#cmakedefine HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the <malloc.h> header file. */
+#cmakedefine HAVE_MALLOC_H 1
+
+/* Define to 1 if you have the <mcheck.h> header file. */
+#cmakedefine HAVE_MCHECK_H 1
+
+/* Define to 1 if you have the <memory.h> header file. */
+#cmakedefine HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the <signal.h> header file. */
+#cmakedefine HAVE_SIGNAL_H 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#cmakedefine HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#cmakedefine HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#cmakedefine HAVE_STRING_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#cmakedefine HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <time.h> header file. */
+#cmakedefine HAVE_TIME_H 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#cmakedefine HAVE_UNISTD_H 1
+
+/* Define to 1 if you have the `floorf' function. */
+#cmakedefine HAVE_FLOORF 1
+
+/* Define to 1 if you have the `round' function. */
+#cmakedefine HAVE_ROUND 1
+
+/* Define to 1 if you have the `roundf' function. */
+#cmakedefine HAVE_ROUNDF 1
+
+/* Define to 1 if you have the `powf' function. */
+#cmakedefine HAVE_POWF 1
+
+/* Define to 1 if you have the `zlib' library */
+#cmakedefine HAVE_ZLIB 1
+
+/* Define to 1 if you have the `bzip2' library */
+#cmakedefine HAVE_BZIP2 1
+#cmakedefine HAVE_BZLIB 1
+
+/* Define to 1 if you have the `boost_xxx_yyy' library
+   (-lboost_xxx_yyy). */
+#cmakedefine HAVE_LIBBOOST_DATE_TIME 1
+#cmakedefine HAVE_LIBBOOST_FILESYSTEM 1
+#cmakedefine HAVE_LIBBOOST_IOSTREAMS 1
+#cmakedefine HAVE_LIBBOOST_PROGRAM_OPTIONS 1
+#cmakedefine HAVE_LIBBOOST_REGEX 1
+#cmakedefine HAVE_LIBBOOST_SERIALIZATION 1
+#cmakedefine HAVE_LIBBOOST_SYSTEM 1
+
+/* Name of package */
+#cmakedefine PACKAGE @PACKAGE@
+
+/* Top level namespace */
+#cmakedefine NAMESPACE @NAMESPACE@
+
+/* Define to the address where bug reports for this package should be sent. */
+/* #undef PACKAGE_BUGREPORT isaac_bug at illumina.com */
+
+/* Define to the full name of this package. */
+#cmakedefine PACKAGE_NAME @PACKAGE_NAME@
+
+/* Define to the full name and version of this package. */
+#cmakedefine PACKAGE_STRING @PACKAGE_STRING@
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the version of this package. */
+#cmakedefine PACKAGE_VERSION @PACKAGE_VERSION@
+
+/* Version number of package */
+#cmakedefine VERSION @VERSION@
+
+#define CXX_COMPILER_NAME "@CXX_COMPILER_NAME@"
+
+#define COMPILER_VERSION "@COMPILER_VERSION@"
+
+/* Define to empty if `const' does not conform to ANSI C. */
+#undef const
diff --git a/src/c++/lib/common/configBuildTimeInfo.h.in b/src/c++/lib/common/configBuildTimeInfo.h.in
new file mode 100644
index 0000000..79b8eb4
--- /dev/null
+++ b/src/c++/lib/common/configBuildTimeInfo.h.in
@@ -0,0 +1,29 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// This consolidates compile-time config data such as git status
+/// and build date. This is in contrast to cmake configuration-time
+/// config data like relative paths and library/header availability.
+///
+
+#define BUILD_TIME "@BUILD_TIME@"
+
+#define WORKFLOW_VERSION "@WORKFLOW_VERSION@"
diff --git a/src/c++/lib/common/test/CMakeLists.txt b/src/c++/lib/common/test/CMakeLists.txt
new file mode 100644
index 0000000..83c64d7
--- /dev/null
+++ b/src/c++/lib/common/test/CMakeLists.txt
@@ -0,0 +1,28 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## Configuration file for the unit tests subdirectory
+##
+## author Ole Schulz-Trieglaff
+##
+################################################################################
+
+include(${THIS_CXX_TEST_LIBRARY_CMAKE})
diff --git a/src/c++/lib/common/test/ReadPairOrientTest.cpp b/src/c++/lib/common/test/ReadPairOrientTest.cpp
new file mode 100644
index 0000000..ec17e31
--- /dev/null
+++ b/src/c++/lib/common/test/ReadPairOrientTest.cpp
@@ -0,0 +1,82 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "boost/test/unit_test.hpp"
+
+#include "ReadPairOrient.hh"
+
+
+BOOST_AUTO_TEST_SUITE( test_ReadPairOrient )
+
+
+BOOST_AUTO_TEST_CASE( test_PairTypes )
+{
+    // innies:
+    {
+        const pos_t readAPos(10);
+        const bool readAFwd(true);
+        const pos_t readBPos(20);
+        const bool readBFwd(false);
+
+        // map A->1 B->2
+        PAIR_ORIENT::index_t res = PAIR_ORIENT::get_index(readAPos, readAFwd, readBPos, readBFwd);
+        BOOST_REQUIRE_EQUAL(res, PAIR_ORIENT::Rp);
+
+        // map A->2 B->1
+        PAIR_ORIENT::index_t res2 = PAIR_ORIENT::get_index(readBPos, readBFwd, readAPos, readAFwd);
+        BOOST_REQUIRE_EQUAL(res2, PAIR_ORIENT::Rp);
+    }
+
+    // outties
+    {
+        const pos_t readAPos(30);
+        const bool readAFwd(true);
+        const pos_t readBPos(20);
+        const bool readBFwd(false);
+
+        // map A->1 B->2
+        PAIR_ORIENT::index_t res = PAIR_ORIENT::get_index(readAPos, readAFwd, readBPos, readBFwd);
+        BOOST_REQUIRE_EQUAL(res, PAIR_ORIENT::Rm);
+
+        // map A->2 B->1
+        PAIR_ORIENT::index_t res2 = PAIR_ORIENT::get_index(readBPos, readBFwd, readAPos, readAFwd);
+        BOOST_REQUIRE_EQUAL(res2, PAIR_ORIENT::Rm);
+    }
+
+    // short fragments should resolve to innies:
+    {
+        const pos_t readAPos(10);
+        const bool readAFwd(true);
+        const pos_t readBPos(10);
+        const bool readBFwd(false);
+
+        // map A->1 B->2
+        PAIR_ORIENT::index_t res = PAIR_ORIENT::get_index(readAPos, readAFwd, readBPos, readBFwd);
+        BOOST_REQUIRE_EQUAL(res, PAIR_ORIENT::Rp);
+
+        // map A->2 B->1
+        PAIR_ORIENT::index_t res2 = PAIR_ORIENT::get_index(readBPos, readBFwd, readAPos, readAFwd);
+        BOOST_REQUIRE_EQUAL(res2, PAIR_ORIENT::Rp);
+    }
+
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/common/test/test_main.cpp b/src/c++/lib/common/test/test_main.cpp
new file mode 100644
index 0000000..66d5348
--- /dev/null
+++ b/src/c++/lib/common/test/test_main.cpp
@@ -0,0 +1,23 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#define BOOST_TEST_MODULE libcommon
+#include "boost/test/unit_test.hpp"
+
diff --git a/src/c++/lib/format/CMakeLists.txt b/src/c++/lib/format/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/format/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/format/VcfWriterCandidateSV.cpp b/src/c++/lib/format/VcfWriterCandidateSV.cpp
new file mode 100644
index 0000000..5158754
--- /dev/null
+++ b/src/c++/lib/format/VcfWriterCandidateSV.cpp
@@ -0,0 +1,84 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "format/VcfWriterCandidateSV.hh"
+
+
+
+
+void
+VcfWriterCandidateSV::
+addHeaderInfo() const
+{
+    _os << "##INFO=<ID=PAIR_COUNT,Number=1,Type=Integer,Description=\"Read pairs supporting this variant where both reads are confidently mapped\">\n";
+    _os << "##INFO=<ID=BND_PAIR_COUNT,Number=1,Type=Integer,Description=\"Confidently mapped reads supporting this variant at this breakend (mapping may not be confident at remote breakend)\">\n";
+    _os << "##INFO=<ID=UPSTREAM_PAIR_COUNT,Number=1,Type=Integer,Description=\"Confidently mapped reads supporting this variant at the upstream breakend (mapping may not be confident at downstream breakend)\">\n";
+    _os << "##INFO=<ID=DOWNSTREAM_PAIR_COUNT,Number=1,Type=Integer,Description=\"Confidently mapped reads supporting this variant at this downstream breakend (mapping may not be confident at upstream breakend)\">\n";
+}
+
+
+
+void
+VcfWriterCandidateSV::
+modifyTranslocInfo(
+    const SVCandidate& sv,
+    const bool isFirstOfPair,
+    InfoTag_t& infoTags) const
+{
+    const SVBreakend& bpA( isFirstOfPair ? sv.bp1 : sv.bp2);
+
+    infoTags.push_back( str(boost::format("BND_PAIR_COUNT=%i") % bpA.getLocalPairCount()) );
+    infoTags.push_back( str(boost::format("PAIR_COUNT=%i") % bpA.getPairCount()) );
+}
+
+
+
+void
+VcfWriterCandidateSV::
+modifyInvdelInfo(
+    const SVCandidate& sv,
+    const bool isBp1First,
+    InfoTag_t& infoTags) const
+{
+    const SVBreakend& bpA( isBp1First ? sv.bp1 : sv.bp2);
+    const SVBreakend& bpB( isBp1First ? sv.bp2 : sv.bp1);
+
+    infoTags.push_back( str(boost::format("UPSTREAM_PAIR_COUNT=%i") % bpA.getLocalPairCount()) );
+    infoTags.push_back( str(boost::format("DOWNSTREAM_PAIR_COUNT=%i") % bpB.getLocalPairCount()) );
+    infoTags.push_back( str(boost::format("PAIR_COUNT=%i") % bpA.getPairCount()) );
+}
+
+
+
+void
+VcfWriterCandidateSV::
+writeSV(
+    const SVCandidateSetData& svData,
+    const SVCandidateAssemblyData& adata,
+    const SVCandidate& sv,
+    const SVId& svId)
+{
+    static const EventInfo event;
+    writeSVCore( svData, adata, sv, svId, event);
+}
diff --git a/src/c++/lib/format/VcfWriterCandidateSV.hh b/src/c++/lib/format/VcfWriterCandidateSV.hh
new file mode 100644
index 0000000..2ac838f
--- /dev/null
+++ b/src/c++/lib/format/VcfWriterCandidateSV.hh
@@ -0,0 +1,62 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "format/VcfWriterSV.hh"
+
+
+struct VcfWriterCandidateSV : public VcfWriterSV
+{
+    VcfWriterCandidateSV(
+        const std::string& referenceFilename,
+        const bool isRNA,
+        const SVLocusSet& set,
+        std::ostream& os) :
+        VcfWriterSV(referenceFilename, isRNA, set, os)
+    {}
+
+    void
+    addHeaderInfo() const override;
+
+    void
+    modifyTranslocInfo(
+        const SVCandidate& sv,
+        const bool isFirstOfPair,
+        InfoTag_t& infoTags) const override;
+
+    void
+    modifyInvdelInfo(
+        const SVCandidate& sv,
+        const bool isBp1First,
+        InfoTag_t& infoTags) const override;
+
+    void
+    writeSV(
+        const SVCandidateSetData& svData,
+        const SVCandidateAssemblyData& adata,
+        const SVCandidate& sv,
+        const SVId& svId);
+};
+
diff --git a/src/c++/lib/format/VcfWriterDiploidSV.cpp b/src/c++/lib/format/VcfWriterDiploidSV.cpp
new file mode 100644
index 0000000..c840cfa
--- /dev/null
+++ b/src/c++/lib/format/VcfWriterDiploidSV.cpp
@@ -0,0 +1,275 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "format/VcfWriterDiploidSV.hh"
+
+
+
+void
+VcfWriterDiploidSV::
+addHeaderInfo() const
+{
+    _os << "##INFO=<ID=BND_DEPTH,Number=1,Type=Integer,Description=\"Read depth at local translocation breakend\">\n";
+    _os << "##INFO=<ID=MATE_BND_DEPTH,Number=1,Type=Integer,Description=\"Read depth at remote translocation mate breakend\">\n";
+    _os << "##INFO=<ID=JUNCTION_QUAL,Number=1,Type=Integer,Description=\"If the SV junction is part of an EVENT (ie. a multi-adjacency variant), this field provides the QUAL value for the adjacency in question only\">\n";
+    if (_isRNA)
+    {
+        _os << "##INFO=<ID=REF_COUNT,Number=1,Type=Integer,Description=\"For RNA fusions, the number of reads supporting the reference allele at this breakend\">\n";
+        _os << "##INFO=<ID=MATE_REF_COUNT,Number=1,Type=Integer,Description=\"For RNA fusions, the number of reads supporting the reference allele at the other breakend\">\n";
+    }
+}
+
+
+
+void
+VcfWriterDiploidSV::
+addHeaderFormat() const
+{
+    _os << "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n";
+    _os << "##FORMAT=<ID=FT,Number=1,Type=String,Description=\"Sample filter, 'PASS' indicates that all filters have passed for this sample\">\n";
+    _os << "##FORMAT=<ID=GQ,Number=1,Type=Float,Description=\"Genotype Quality\">\n";
+    _os << "##FORMAT=<ID=PL,Number=G,Type=Integer,Description=\"Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification\">\n";
+    _os << "##FORMAT=<ID=PR,Number=.,Type=Integer,Description=\"Spanning paired-read support for the ref and alt alleles in the order listed\">\n";
+    _os << "##FORMAT=<ID=SR,Number=.,Type=Integer,Description=\"Split reads for the ref and alt alleles in the order listed, for reads where P(allele|read)>0.999\">\n";
+    if (_isRNA)
+    {
+        _os << "##FORMAT=<ID=FS,Number=2,Type=Integer,Description=\"For RNA variants split reads supporting the ref and alt alleles in the order listed\">\n";
+        _os << "##FORMAT=<ID=FP,Number=2,Type=Integer,Description=\"For RNA variants spanning paired reads supporting the ref and alt alleles in the order listed\">\n";
+    }
+}
+
+
+
+void
+VcfWriterDiploidSV::
+addHeaderFilters() const
+{
+    if (_isMaxDepthFilter)
+    {
+        _os << "##FILTER=<ID=" << _diploidOpt.maxDepthFilterLabel << ",Description=\"Depth is greater than " << _diploidOpt.maxDepthFactor << "x the median chromosome depth near one or both variant breakends\">\n";
+    }
+    _os << "##FILTER=<ID=" << _diploidOpt.maxMQ0FracLabel << ",Description=\"For a small variant (<1000 bases), the fraction of reads in all samples with MAPQ0 around either breakend exceeds " << _diploidOpt.maxMQ0Frac << "\">\n";
+    _os << "##FILTER=<ID=" << _diploidOpt.noPairSupportLabel << ",Description=\"For variants significantly larger than the paired read fragment size, no paired reads support the alternate allele in any sample.\">\n";
+    _os << "##FILTER=<ID=" << _diploidOpt.minAltFilterLabel << ",Description=\"QUAL score is less than " << _diploidOpt.minPassAltScore << "\">\n";
+    _os << "##FILTER=<ID=" << _diploidOpt.minGTFilterLabel << ",Description=\"GQ score is less than " << _diploidOpt.minPassGTScore << " (filter applied at sample level and record level if all samples are filtered)\">\n";
+    if (_isRNA)
+    {
+        _os << "##FILTER=<ID=" << _diploidOpt.rnaFilterLabel << ",Description=\"RNA fusion variants without split read and split pair support\">\n";
+    }
+}
+
+
+
+void
+VcfWriterDiploidSV::
+modifyInfo(
+    const EventInfo& event,
+    InfoTag_t& infotags) const
+{
+    if (event.isEvent())
+    {
+        infotags.push_back( str(boost::format("JUNCTION_QUAL=%i") % getSingleJunctionDiploidInfo().altScore) );
+    }
+}
+
+
+
+void
+VcfWriterDiploidSV::
+modifyTranslocInfo(
+    const SVCandidate& /*sv*/,
+    const bool isFirstOfPair,
+    InfoTag_t& infotags) const
+{
+    const SVScoreInfo& baseInfo(getBaseInfo());
+
+    infotags.push_back( str(boost::format("BND_DEPTH=%i") %
+                            (isFirstOfPair ? baseInfo.bp1MaxDepth : baseInfo.bp2MaxDepth) ) );
+    infotags.push_back( str(boost::format("MATE_BND_DEPTH=%i") %
+                            (isFirstOfPair ? baseInfo.bp2MaxDepth : baseInfo.bp1MaxDepth) ) );
+    if (_isRNA)
+    {
+        ///TODO better multisample handler here:
+        const unsigned sampleIndex(0);
+
+        const SVSampleAlleleInfo& refinfo(baseInfo.samples[sampleIndex].ref);
+        infotags.push_back(str(boost::format("REF_COUNT=%i") %
+                               (isFirstOfPair ? refinfo.confidentSplitReadAndPairCountRefBp1 : refinfo.confidentSplitReadAndPairCountRefBp2)));
+        infotags.push_back(str(boost::format("MATE_REF_COUNT=%i") %
+                               (isFirstOfPair ? refinfo.confidentSplitReadAndPairCountRefBp2 : refinfo.confidentSplitReadAndPairCountRefBp1)));
+    }
+}
+
+
+
+void
+VcfWriterDiploidSV::
+writeQual() const
+{
+    _os << getDiploidInfo().altScore;
+}
+
+
+
+void
+VcfWriterDiploidSV::
+writeFilter() const
+{
+    writeFilters(getDiploidInfo().filters,_os);
+}
+
+
+
+static
+const char*
+gtLabel(
+    const DIPLOID_GT::index_t id)
+{
+    using namespace DIPLOID_GT;
+    switch (id)
+    {
+    case REF :
+        return "0/0";
+    case HET :
+        return "0/1";
+    case HOM :
+        return "1/1";
+    default :
+        return "";
+    }
+}
+
+
+
+void
+VcfWriterDiploidSV::
+modifySample(
+    const SVCandidate& sv,
+    SampleTag_t& sampletags) const
+{
+    const SVScoreInfo& baseInfo(getBaseInfo());
+    const SVScoreInfoDiploid& diploidInfo(getDiploidInfo());
+    const unsigned diploidSampleCount(diploidInfo.samples.size());
+
+    std::vector<std::string> values(diploidSampleCount);
+    for (unsigned diploidSampleIndex(0); diploidSampleIndex<diploidSampleCount; ++diploidSampleIndex)
+    {
+        const SVScoreInfoDiploidSample& diploidSampleInfo(diploidInfo.samples[diploidSampleIndex]);
+        values[diploidSampleIndex] = gtLabel(diploidSampleInfo.gt);
+    }
+    sampletags.push_back(std::make_pair("GT",values));
+
+    for (unsigned diploidSampleIndex(0); diploidSampleIndex<diploidSampleCount; ++diploidSampleIndex)
+    {
+        const SVScoreInfoDiploidSample& diploidSampleInfo(diploidInfo.samples[diploidSampleIndex]);
+
+        writeFilters(diploidSampleInfo.filters, values[diploidSampleIndex]);
+    }
+    sampletags.push_back(std::make_pair("FT",values));
+
+    for (unsigned diploidSampleIndex(0); diploidSampleIndex<diploidSampleCount; ++diploidSampleIndex)
+    {
+        const SVScoreInfoDiploidSample& diploidSampleInfo(diploidInfo.samples[diploidSampleIndex]);
+
+        values[diploidSampleIndex] = str( boost::format("%s") % diploidSampleInfo.gtScore);
+    }
+    sampletags.push_back(std::make_pair("GQ",values));
+
+    for (unsigned diploidSampleIndex(0); diploidSampleIndex<diploidSampleCount; ++diploidSampleIndex)
+    {
+        const SVScoreInfoDiploidSample& diploidSampleInfo(diploidInfo.samples[diploidSampleIndex]);
+
+        values[diploidSampleIndex] =  str( boost::format("%s,%s,%s")
+                                           % diploidSampleInfo.phredLoghood[DIPLOID_GT::REF]
+                                           % diploidSampleInfo.phredLoghood[DIPLOID_GT::HET]
+                                           % diploidSampleInfo.phredLoghood[DIPLOID_GT::HOM]);
+    }
+    sampletags.push_back(std::make_pair("PL",values));
+
+    for (unsigned diploidSampleIndex(0); diploidSampleIndex<diploidSampleCount; ++diploidSampleIndex)
+    {
+        const SVSampleInfo& sampleInfo(baseInfo.samples[diploidSampleIndex]);
+        values[diploidSampleIndex] =  str( boost::format("%i,%i")
+                                           % sampleInfo.ref.confidentSpanningPairCount
+                                           % sampleInfo.alt.confidentSpanningPairCount);
+    }
+    sampletags.push_back(std::make_pair("PR",values));
+
+    if (sv.isImprecise()) return;
+
+    for (unsigned diploidSampleIndex(0); diploidSampleIndex<diploidSampleCount; ++diploidSampleIndex)
+    {
+        const SVSampleInfo& sampleInfo(baseInfo.samples[diploidSampleIndex]);
+        values[diploidSampleIndex] =  str( boost::format("%i,%i")
+                                           % sampleInfo.ref.confidentSplitReadCount
+                                           % sampleInfo.alt.confidentSplitReadCount);
+    }
+    sampletags.push_back(std::make_pair("SR",values));
+
+    if (! _isRNA) return;
+
+    for (unsigned diploidSampleIndex(0); diploidSampleIndex<diploidSampleCount; ++diploidSampleIndex)
+    {
+        const SVSampleInfo& sampleInfo(baseInfo.samples[diploidSampleIndex]);
+        values[diploidSampleIndex] =  str( boost::format("%i,%i")
+                                           % sampleInfo.ref.splitReadCount
+                                           % sampleInfo.alt.splitReadCount);
+    }
+    sampletags.push_back(std::make_pair("FS",values));
+
+    for (unsigned diploidSampleIndex(0); diploidSampleIndex<diploidSampleCount; ++diploidSampleIndex)
+    {
+        const SVSampleInfo& sampleInfo(baseInfo.samples[diploidSampleIndex]);
+        values[diploidSampleIndex] =  str( boost::format("%i,%i")
+                                           % sampleInfo.ref.spanningPairCount
+                                           % sampleInfo.alt.spanningPairCount);
+    }
+    sampletags.push_back(std::make_pair("FP",values));
+}
+
+
+
+void
+VcfWriterDiploidSV::
+writeSV(
+    const SVCandidateSetData& svData,
+    const SVCandidateAssemblyData& adata,
+    const SVCandidate& sv,
+    const SVId& svId,
+    const SVScoreInfo& baseInfo,
+    const SVScoreInfoDiploid& diploidInfo,
+    const EventInfo& event,
+    const SVScoreInfoDiploid& singleJunctionDiploidInfo)
+{
+    //TODO: this is a lame way to customize subclass behavior:
+    setScoreInfo(baseInfo);
+    _diploidInfoPtr=&diploidInfo;
+    _singleJunctionDiploidInfoPtr=&singleJunctionDiploidInfo;
+
+    writeSVCore(svData, adata, sv, svId, event);
+
+    clearScoreInfo();
+    _diploidInfoPtr=nullptr;
+    _singleJunctionDiploidInfoPtr=nullptr;
+}
diff --git a/src/c++/lib/format/VcfWriterDiploidSV.hh b/src/c++/lib/format/VcfWriterDiploidSV.hh
new file mode 100644
index 0000000..dda43a1
--- /dev/null
+++ b/src/c++/lib/format/VcfWriterDiploidSV.hh
@@ -0,0 +1,113 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "manta/JunctionIdGenerator.hh"
+#include "manta/SVModelScoreInfo.hh"
+#include "format/VcfWriterSV.hh"
+#include "format/VcfWriterScoredSV.hh"
+#include "options/CallOptionsDiploid.hh"
+
+
+struct VcfWriterDiploidSV : public VcfWriterSV, VcfWriterScoredSV
+{
+    VcfWriterDiploidSV(
+        const CallOptionsDiploid& diploidOpt,
+        const bool isMaxDepthFilter,
+        const std::string& referenceFilename,
+        const bool isRNA,
+        const SVLocusSet& set,
+        std::ostream& os) :
+        VcfWriterSV(referenceFilename,isRNA,set,os),
+        _diploidOpt(diploidOpt),
+        _isMaxDepthFilter(isMaxDepthFilter),
+        _diploidInfoPtr(nullptr),
+        _singleJunctionDiploidInfoPtr(nullptr)
+    {}
+
+    void
+    writeSV(
+        const SVCandidateSetData& svData,
+        const SVCandidateAssemblyData& adata,
+        const SVCandidate& sv,
+        const SVId& svId,
+        const SVScoreInfo& baseInfo,
+        const SVScoreInfoDiploid& diploidInfo,
+        const EventInfo& event,
+        const SVScoreInfoDiploid& singleJunctionDiploidInfo);
+
+private:
+
+    void
+    addHeaderInfo() const override;
+
+    void
+    addHeaderFormat() const override;
+
+    void
+    addHeaderFilters() const override;
+
+    void
+    modifyInfo(
+        const EventInfo& event,
+        InfoTag_t& infotags) const override;
+
+    void
+    modifySample(
+        const SVCandidate& sv,
+        SampleTag_t& sampletags) const override;
+
+    void
+    modifyTranslocInfo(
+        const SVCandidate& sv,
+        const bool isFirstOfPair,
+        InfoTag_t& infotags) const override;
+
+    void
+    writeQual() const override;
+
+    void
+    writeFilter() const override;
+
+    const SVScoreInfoDiploid&
+    getDiploidInfo() const
+    {
+        assert(nullptr != _diploidInfoPtr);
+        return *_diploidInfoPtr;
+    }
+
+    const SVScoreInfoDiploid&
+    getSingleJunctionDiploidInfo() const
+    {
+        assert(NULL != _singleJunctionDiploidInfoPtr);
+        return *_singleJunctionDiploidInfoPtr;
+    }
+
+
+    const CallOptionsDiploid& _diploidOpt;
+    const bool _isMaxDepthFilter;
+    const SVScoreInfoDiploid* _diploidInfoPtr;
+    const SVScoreInfoDiploid* _singleJunctionDiploidInfoPtr;
+};
diff --git a/src/c++/lib/format/VcfWriterSV.cpp b/src/c++/lib/format/VcfWriterSV.cpp
new file mode 100644
index 0000000..4423ca1
--- /dev/null
+++ b/src/c++/lib/format/VcfWriterSV.cpp
@@ -0,0 +1,922 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+/// \author Felix Schlesinger
+///
+
+#include "format/VcfWriterSV.hh"
+
+#include "blt_util/log.hh"
+#include "blt_util/seq_util.hh"
+#include "blt_util/string_util.hh"
+#include "htsapi/samtools_fasta_util.hh"
+#include "htsapi/vcf_util.hh"
+#include "common/Exceptions.hh"
+#include "manta/SVCandidateUtil.hh"
+
+#include <iostream>
+#include <sstream>
+
+
+//#define DEBUG_VCF
+
+
+#ifdef DEBUG_VCF
+#include "blt_util/log.hh"
+#endif
+
+
+
+VcfWriterSV::
+VcfWriterSV(
+    const std::string& referenceFilename,
+    const bool isRNA,
+    const SVLocusSet& set,
+    std::ostream& os) :
+    _referenceFilename(referenceFilename),
+    _isRNA(isRNA),
+    _header(set.header),
+    _os(os)
+{
+}
+
+
+
+void
+VcfWriterSV::
+writeHeaderPrefix(
+    const char* progName,
+    const char* progVersion)
+{
+    _os << "##fileformat=VCFv4.1\n";
+    _os << "##fileDate=" << vcf_fileDate << "\n";
+    _os << "##source=" << progName << " " << progVersion << "\n";
+    _os << "##reference=file://" << _referenceFilename << "\n";
+
+    for (const bam_header_info::chrom_info& cdata : _header.chrom_data)
+    {
+        _os << "##contig=<ID=" << cdata.label << ",length=" << cdata.length << ">\n";
+    }
+
+    /// vcf 4.1 reserved/suggested INFO tags:
+    _os << "##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description=\"Imprecise structural variation\">\n";
+    _os << "##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">\n";
+    _os << "##INFO=<ID=SVLEN,Number=.,Type=Integer,Description=\"Difference in length between REF and ALT alleles\">\n";
+    _os << "##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">\n";
+    _os << "##INFO=<ID=CIPOS,Number=2,Type=Integer,Description=\"Confidence interval around POS\">\n";
+    _os << "##INFO=<ID=CIEND,Number=2,Type=Integer,Description=\"Confidence interval around END\">\n";
+    _os << "##INFO=<ID=CIGAR,Number=A,Type=String,Description=\"CIGAR alignment for each alternate indel allele\">\n";
+    _os << "##INFO=<ID=MATEID,Number=.,Type=String,Description=\"ID of mate breakend\">\n";
+    _os << "##INFO=<ID=EVENT,Number=1,Type=String,Description=\"ID of event associated to breakend\">\n";
+    _os << "##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description=\"Length of base pair identical homology at event breakpoints\">\n";
+    _os << "##INFO=<ID=HOMSEQ,Number=.,Type=String,Description=\"Sequence of base pair identical homology at event breakpoints\">\n";
+
+    /// custom INFO tags:
+    _os << "##INFO=<ID=SVINSLEN,Number=.,Type=Integer,Description=\"Length of insertion\">\n";
+    _os << "##INFO=<ID=SVINSSEQ,Number=.,Type=String,Description=\"Sequence of insertion\">\n";
+    _os << "##INFO=<ID=LEFT_SVINSSEQ,Number=.,Type=String,Description=\"Known left side of insertion for an insertion of unknown length\">\n";
+    _os << "##INFO=<ID=RIGHT_SVINSSEQ,Number=.,Type=String,Description=\"Known right side of insertion for an insertion of unknown length\">\n";
+    _os << "##INFO=<ID=INV3,Number=0,Type=Flag,Description=\"Inversion breakends open 3' of reported location\">\n";
+    _os << "##INFO=<ID=INV5,Number=0,Type=Flag,Description=\"Inversion breakends open 5' of reported location\">\n";
+
+    if (_isRNA)
+    {
+        _os << "##INFO=<ID=RNA_FIRST,Number=0,Type=Flag,Description=\"For RNA fusions, this break-end is 5' in the fusion transcript\">\n";
+        _os << "##INFO=<ID=RNA_STRANDED,Number=0,Type=Flag,Description=\"For RNA fusions, the direction of transcription is known\">\n";
+        _os << "##INFO=<ID=RNA_FwRvReads,Number=2,Type=Integer,Description=\"For RNA fusions, number of stranded reads supporting forward or reverse direction of transcription\">\n";
+        _os << "##INFO=<ID=RNA_Reads,Number=1,Type=Integer,Description=\"For RNA fusions, the number of reads and pairs that potentially support this candidate before refinement and scoring\">\n";
+        _os << "##INFO=<ID=RNA_CONTIG,Number=1,Type=String,Description=\"For RNA fusions, the sequence of the breakend spanning contig\">\n";
+        _os << "##INFO=<ID=RNA_CONTIG_ALN,Number=2,Type=Integer,Description=\"For RNA fusions, length of the spanning contig alignment on each breakend\">\n";
+    }
+    addHeaderInfo();
+
+    addHeaderFormat();
+
+    addHeaderFilters();
+
+    _os << "##ALT=<ID=BND,Description=\"Translocation Breakend\">\n";
+    _os << "##ALT=<ID=INV,Description=\"Inversion\">\n";
+    _os << "##ALT=<ID=DEL,Description=\"Deletion\">\n";
+    _os << "##ALT=<ID=INS,Description=\"Insertion\">\n";
+    _os << "##ALT=<ID=DUP:TANDEM,Description=\"Tandem Duplication\">\n";
+}
+
+
+static
+void
+writeHeaderColKeyPrefix(std::ostream& os)
+{
+    os << "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO";
+}
+
+
+
+void
+VcfWriterSV::
+writeHeaderColumnKey(
+    const std::vector<std::string>& sampleNames) const
+{
+    writeHeaderColKeyPrefix(_os);
+    if (!sampleNames.empty())
+    {
+        _os << "\tFORMAT";
+
+        for (const std::string& sampleName : sampleNames)
+        {
+            _os << '\t' << sampleName;
+        }
+    }
+    _os << '\n';
+}
+
+
+
+static
+void
+makeInfoField(
+    const VcfWriterSV::InfoTag_t& info,
+    std::ostream& os)
+{
+    static const char sep(';');
+    bool isFirst(true);
+    for (const std::string& is : info)
+    {
+        if (! isFirst) os << sep;
+        else           isFirst = false;
+        os << is;
+    }
+}
+
+
+
+static
+void
+makeFormatSampleField(
+    const VcfWriterSV::SampleTag_t& sample,
+    std::ostream& os)
+{
+    static const char sep(':');
+
+    if (sample.empty()) return;
+
+    {
+        // first write FORMAT field:
+        os << '\t';
+
+        bool isFirst(true);
+        for (const VcfWriterSV::SampleTag_t::value_type& fs : sample)
+        {
+            if (! isFirst) os << sep;
+            else           isFirst = false;
+
+            assert(! fs.first.empty());
+            os << fs.first;
+        }
+    }
+
+    unsigned nSamples(0);
+    for (const VcfWriterSV::SampleTag_t::value_type& fs : sample)
+    {
+        const unsigned ns(fs.second.size());
+        nSamples = std::max(nSamples, ns);
+    }
+
+    for (unsigned sampleIndex(0); sampleIndex < nSamples; ++sampleIndex)
+    {
+        os << '\t';
+
+        // next write SAMPLE field:
+        {
+            bool isFirst(true);
+            for (const VcfWriterSV::SampleTag_t::value_type& fs : sample)
+            {
+                if (! isFirst) os << sep;
+                else           isFirst = false;
+
+                if (fs.second.size() <= sampleIndex)
+                {
+                    os << '.';
+                }
+                else if (fs.second[sampleIndex].empty())
+                {
+                    os << '.';
+                }
+                else
+                {
+                    os << fs.second[sampleIndex];
+                }
+            }
+        }
+    }
+}
+
+
+
+static
+void
+addRNAInfo(
+    const bool isFirstOfPair,
+    const SVCandidate& sv,
+    const SVCandidateAssemblyData& assemblyData,
+    VcfWriterSV::InfoTag_t& infotags)
+{
+    if (! assemblyData.isSpanning) return;
+
+    const bool isFirst = (assemblyData.bporient.isBp1First == isFirstOfPair);
+    if (isFirst) infotags.push_back("RNA_FIRST");
+    if (assemblyData.bporient.isStranded) infotags.push_back("RNA_STRANDED");
+
+    if (!isFirstOfPair) return; // only the first breakpoint gets the additional RNA info attached to its VCF entry
+
+    infotags.push_back(str(boost::format("RNA_FwRvReads=%i,%i") % sv.fwReads % sv.rvReads));
+    infotags.push_back(str(boost::format("RNA_Reads=%i") % sv.bp2.lowresEvidence.getTotal()));
+    const unsigned numContigs(assemblyData.contigs.size());
+    if (numContigs > 0)
+    {
+        if (numContigs != assemblyData.spanningAlignments.size())
+            infotags.push_back(str(boost::format("ERROR=%i,%i") % numContigs % assemblyData.spanningAlignments.size()));
+        if (numContigs <= assemblyData.bestAlignmentIndex)
+            infotags.push_back(str(boost::format("ERROR2=%i,%i") % numContigs % assemblyData.bestAlignmentIndex));
+        infotags.push_back(str(boost::format("RNA_CONTIG=%s") % assemblyData.contigs[assemblyData.bestAlignmentIndex].seq));
+        infotags.push_back(str(boost::format("RNA_CONTIG_ALN=%i,%i")
+                               % apath_matched_length(assemblyData.spanningAlignments[assemblyData.bestAlignmentIndex].align1.apath)
+                               % apath_matched_length(assemblyData.spanningAlignments[assemblyData.bestAlignmentIndex].align2.apath)));
+    }
+}
+
+#ifdef DEBUG_VCF
+static
+void
+addRNADebugInfo(
+    const bool isFirstOfPair,
+    const SVCandidate& sv,
+    const SVCandidateAssemblyData& assemblyData,
+    VcfWriterSV::InfoTag_t& infotags)
+{
+    if (! assemblyData.isSpanning) return;
+
+    const bool isFirst = (assemblyData.bporient.isBp1First == isFirstOfPair);
+    const bool isRightOpen = (isFirstOfPair ? sv.bp1.state : sv.bp2.state) == SVBreakendState::RIGHT_OPEN;
+    infotags.push_back(str(boost::format("FOOBAR_FW=%1%") % (isFirst == isRightOpen)));
+
+    if (!isFirst) return; // only the first breakpoint gets the alignments attached to its VCF entry
+
+    infotags.push_back(str(boost::format("FOOBAR_bp1=%i;bp2=%i") % sv.bp1.interval.tid % sv.bp2.interval.tid));
+
+    // there can be several contigs per breakend, so we iterate over all of them.
+    const unsigned numContigs(assemblyData.contigs.size());
+    // cppcheck-suppress zerodivcond
+    infotags.push_back(str(boost::format("FOOBAR_NCONTIGS=%i") % numContigs));
+    if (numContigs > 0)
+    {
+        if (numContigs != assemblyData.spanningAlignments.size())
+            infotags.push_back(str(boost::format("FOOBAR_ERROR=%i;%i") % numContigs % assemblyData.spanningAlignments.size()));
+        if (numContigs <= assemblyData.bestAlignmentIndex)
+            infotags.push_back(str(boost::format("FOOBAR_ERROR2=%i;%i") % numContigs % assemblyData.bestAlignmentIndex));
+
+        infotags.push_back(str(boost::format("FOOBAR_BEST=%i") % assemblyData.bestAlignmentIndex));
+        //infotags.push_back(str(boost::format("FOOBAR_EXTCONTIG=%s") % assemblyData.extendedContigs[assemblyData.bestAlignmentIndex]));
+        infotags.push_back(str(boost::format("FOOBAR_CONTIGcount=%i") % assemblyData.contigs[assemblyData.bestAlignmentIndex].supportReads.size()));
+    }
+}
+#endif
+
+#ifdef DEBUG_VCF
+static
+void
+addDebugInfo(
+    const SVBreakend& bp1,
+    const SVBreakend& bp2,
+    const bool isFirstOfPair,
+    const SVCandidateAssemblyData& assemblyData,
+    VcfWriterSV::InfoTag_t& infotags)
+{
+    if (! isFirstOfPair) return;
+
+    // store alignment start + cigar string for each section of the jumping alignment.
+    // there can be several contigs per breakend, so we iterate over all of them.
+    // only the first breakpoint gets the alignments attached to its VCF entry
+
+    if (assemblyData.isSpanning)
+    {
+        const unsigned numAlign(assemblyData.spanningAlignments.size());
+        std::string cigar1;
+        std::string cigar2;
+        for (unsigned alignIndex(0); alignIndex<numAlign; ++alignIndex)
+        {
+            const SVCandidateAssemblyData::JumpAlignmentResultType align(assemblyData.spanningAlignments[alignIndex]);
+            infotags.push_back( str(boost::format("CTG_JALIGN_%i_POS_A=%d") %
+                                    alignIndex %
+                                    (bp1.interval.range.begin_pos()+align.align1.beginPos)) );
+            infotags.push_back( str(boost::format("CTG_JALIGN_%i_POS_B=%d") %
+                                    alignIndex %
+                                    (bp2.interval.range.begin_pos()+align.align2.beginPos)) );
+
+            apath_to_cigar(align.align1.apath,cigar1);
+            apath_to_cigar(align.align2.apath,cigar2);
+
+            infotags.push_back( str(boost::format("CTG_JALIGN_%i_CIGAR_A=%s") % alignIndex % cigar1) );
+            infotags.push_back( str(boost::format("CTG_JALIGN_%i_CIGAR_B=%s") % alignIndex % cigar2) );
+        }
+        const unsigned numContigs(assemblyData.contigs.size());
+
+        infotags.push_back(str(boost::format("DEBUG_NCONTIGS=%i") % numContigs));
+        infotags.push_back(str(boost::format("DEBUG_BESTContig=%s") % assemblyData.contigs[assemblyData.bestAlignmentIndex].seq));
+        infotags.push_back(str(boost::format("DEBUG_CONTIGReads=%i") % assemblyData.contigs[assemblyData.bestAlignmentIndex].supportReads.size()));
+        infotags.push_back(str(boost::format("DEBUG_CONTIGLeftAln=%i") % apath_matched_length(assemblyData.spanningAlignments[assemblyData.bestAlignmentIndex].align1.apath)));
+        infotags.push_back(str(boost::format("DEBUG_CONTIGRightAln=%i") % apath_matched_length(assemblyData.spanningAlignments[assemblyData.bestAlignmentIndex].align2.apath)));
+    }
+}
+#endif
+
+
+static
+void
+addSharedInfo(
+    const EventInfo& event,
+    VcfWriterSV::InfoTag_t& infoTags)
+{
+    if (event.isEvent())
+    {
+        infoTags.push_back( str(boost::format("EVENT=%i") % event.label));
+    }
+}
+
+
+
+void
+VcfWriterSV::
+writeTransloc(
+    const SVCandidate& sv,
+    const SVId& svId,
+    const bool isFirstBreakend,
+    const SVCandidateSetData& /*svData*/,
+    const SVCandidateAssemblyData& adata,
+    const EventInfo& event)
+{
+    const bool isImprecise(sv.isImprecise());
+    const bool isBreakendRangeSameShift(sv.isBreakendRangeSameShift());
+
+    const SVBreakend& bpA( isFirstBreakend ? sv.bp1 : sv.bp2);
+    const SVBreakend& bpB( isFirstBreakend ? sv.bp2 : sv.bp1);
+
+    InfoTag_t infotags;
+    SampleTag_t sampletags;
+
+    // get CHROM
+    const std::string& chrom(_header.chrom_data[bpA.interval.tid].label);
+    const std::string& mateChrom(_header.chrom_data[bpB.interval.tid].label);
+
+    const known_pos_range2& bpArange(bpA.interval.range);
+    const known_pos_range2& bpBrange(bpB.interval.range);
+
+    if (! isImprecise)
+    {
+        assert(bpArange.size() == bpBrange.size());
+    }
+
+    // get POS
+    pos_t pos(bpArange.center_pos()+1);
+    pos_t matePos(bpBrange.center_pos()+1);
+    if (! isImprecise)
+    {
+        pos = bpArange.begin_pos()+1;
+        if (isBreakendRangeSameShift)
+        {
+            matePos = bpBrange.begin_pos()+1;
+        }
+        else
+        {
+            matePos = bpBrange.end_pos();
+        }
+    }
+
+    // TODO: improve circular genome handler:
+    if ((pos<1) || (matePos<1)) return;
+
+    // get ID
+    const std::string& localId(isFirstBreakend ? svId.localId : svId.mateId);
+    const std::string& mateId(isFirstBreakend ? svId.mateId : svId.localId);
+
+    // get REF
+    std::string ref;
+    get_standardized_region_seq(_referenceFilename,chrom,pos-1,pos-1,ref);
+
+    assert(1 == ref.size());
+
+    const bool isReverseInsertSeq(! (isFirstBreakend || (bpA.state != bpB.state)));
+    std::string tmpString;
+    const std::string* insertSeqPtr(&sv.insertSeq);
+    if (isReverseInsertSeq)
+    {
+        tmpString = reverseCompCopyStr(sv.insertSeq);
+        insertSeqPtr = &tmpString;
+    }
+    const std::string& insertSeq(*insertSeqPtr);
+
+    // build alt:
+    boost::format altFormat("%4%%3%%1%:%2%%3%%5%");
+    {
+        std::string altPrefix;
+        std::string altSuffix;
+        if     (bpA.state == SVBreakendState::RIGHT_OPEN)
+        {
+            altPrefix = ref + insertSeq;
+        }
+        else if (bpA.state == SVBreakendState::LEFT_OPEN)
+        {
+            altSuffix = insertSeq + ref;
+        }
+        else
+        {
+            assert(false && "Unexpected bpA.state");
+        }
+
+
+        char altSep('?');
+        if     (bpB.state == SVBreakendState::RIGHT_OPEN)
+        {
+            altSep=']';
+        }
+        else if (bpB.state == SVBreakendState::LEFT_OPEN)
+        {
+            altSep='[';
+        }
+        else
+        {
+            assert(false && "Unexpected bpB.state");
+        }
+
+        altFormat % mateChrom % matePos % altSep % altPrefix % altSuffix;
+    }
+
+    // build INFO field
+    infotags.push_back("SVTYPE=BND");
+    infotags.push_back("MATEID="+mateId);
+    if (isImprecise)
+    {
+        infotags.push_back("IMPRECISE");
+    }
+
+    if (bpArange.size() > 1)
+    {
+        infotags.push_back( str( boost::format("CIPOS=%i,%i") % ((bpArange.begin_pos()+1) - pos) % (bpArange.end_pos() - pos) ));
+    }
+
+    if (! isImprecise)
+    {
+        if (bpArange.size() > 1)
+        {
+            infotags.push_back( str( boost::format("HOMLEN=%i") % (bpArange.size()-1) ));
+            std::string homref;
+            get_standardized_region_seq(_referenceFilename,chrom,bpArange.begin_pos()+1,bpArange.end_pos()-1,homref);
+            infotags.push_back( str( boost::format("HOMSEQ=%s") % (homref) ));
+        }
+    }
+
+    if (! insertSeq.empty())
+    {
+        infotags.push_back( str( boost::format("SVINSLEN=%i") % (insertSeq.size()) ));
+        infotags.push_back( str( boost::format("SVINSSEQ=%s") % (insertSeq) ));
+    }
+
+    addSharedInfo(event, infotags);
+
+    modifyInfo(event, infotags);
+    modifyTranslocInfo(sv, isFirstBreakend, infotags);
+
+    modifySample(sv, sampletags);
+#ifdef DEBUG_VCF
+    addDebugInfo(bpA, bpB, isFirstBreakend, adata, infotags);
+#endif
+
+    if (_isRNA)
+    {
+        addRNAInfo(isFirstBreakend, sv, adata, infotags);
+#ifdef DEBUG_VCF
+        addRNADebugInfo(isFirstBreakend, sv, adata, infotags);
+#endif
+    }
+
+    // write out record:
+    _os << chrom
+        << '\t' << pos
+        << '\t' << localId // ID
+        << '\t' << ref // REF
+        << '\t' << str( altFormat ) // ALT
+        << '\t';
+    writeQual();
+    _os << '\t';
+    writeFilter();
+    _os << '\t';
+    makeInfoField(infotags,_os); // INFO
+    makeFormatSampleField(sampletags, _os); // FORMAT + SAMPLE
+    _os << '\n';
+}
+
+
+
+void
+VcfWriterSV::
+writeTranslocPair(
+    const SVCandidate& sv,
+    const SVId& svId,
+    const SVCandidateSetData& svData,
+    const SVCandidateAssemblyData& adata,
+    const EventInfo& event)
+{
+    writeTransloc(sv, svId, true, svData, adata, event);
+    writeTransloc(sv, svId, false, svData, adata, event);
+}
+
+
+
+void
+VcfWriterSV::
+writeInvdel(
+    const SVCandidate& sv,
+    const SVId& svId,
+    const bool isIndel,
+    const EventInfo& event)
+{
+    const bool isImprecise(sv.isImprecise());
+    const bool isBreakendRangeSameShift(sv.isBreakendRangeSameShift());
+
+    const bool isBp1First(sv.bp1.interval.range.begin_pos()<=sv.bp2.interval.range.begin_pos());
+
+    const SVBreakend& bpA(isBp1First ? sv.bp1 : sv.bp2);
+    const SVBreakend& bpB(isBp1First ? sv.bp2 : sv.bp1);
+
+    InfoTag_t infoTags;
+    SampleTag_t sampleTags;
+
+    // get CHROM
+    const std::string& chrom(_header.chrom_data[sv.bp1.interval.tid].label);
+
+    const known_pos_range2& bpArange(bpA.interval.range);
+    const known_pos_range2& bpBrange(bpB.interval.range);
+
+    if (! isImprecise)
+    {
+        assert(bpArange.size() == bpBrange.size());
+    }
+
+    // above this size all records use symbolic alleles (ie. <DEL>):
+    static const unsigned maxNonSymbolicRecordSize(1000);
+
+    // if the variant is a combination of simple insertion and deletions, and below
+    // a large-event size threshold, it is classified as a small variant. In this case
+    // we report the event using full REF and ALT sequences, plus a CIGAR string for
+    // complex in/del combinations
+    //
+    bool isSmallVariant(false);
+    if ((! isImprecise) && isIndel && (! sv.isUnknownSizeInsertion))
+    {
+        const unsigned deleteSize(bpBrange.begin_pos() - bpArange.begin_pos());
+        const unsigned insertSize(sv.insertSeq.size());
+
+        const bool isSmallDelete(deleteSize<=maxNonSymbolicRecordSize);
+        const bool isSmallInsert(insertSize<=maxNonSymbolicRecordSize);
+
+        isSmallVariant = (isSmallDelete && isSmallInsert);
+    }
+
+    // get POS and endPos,
+    // first compute internal coordinates and then transform per vcf conventions:
+    pos_t internal_pos(bpArange.center_pos());
+    pos_t internal_endPos(bpBrange.center_pos());
+    if (! isImprecise)
+    {
+        internal_pos = bpArange.begin_pos();
+        if (isBreakendRangeSameShift)
+        {
+            internal_endPos = bpBrange.begin_pos();
+        }
+        else
+        {
+            internal_endPos = (bpBrange.end_pos()- 1);
+        }
+    }
+
+    // now create external pos values for vcf only
+    // everything is +1'd to get out zero-indexed coordinates:
+    pos_t pos(internal_pos+1);
+    pos_t endPos(internal_endPos+1);
+
+    // variants are adjusted by up to one base according to breakend direction to match vcf spec:
+    const pos_t bpABkptAdjust(bpA.getLeftSideOfBkptAdjustment());
+    const pos_t bpBBkptAdjust(bpB.getLeftSideOfBkptAdjustment());
+    pos += bpABkptAdjust;
+    endPos += bpBBkptAdjust;
+
+    if (isImprecise)
+    {
+        // check against the rare IMPRECISE case arising when CIEND is a subset of CIPOS:
+        endPos=std::max(endPos,pos+1);
+    }
+
+    if (pos<1) return;
+
+    // get REF
+    std::string ref;
+    {
+        const pos_t beginRefPos(pos-1);
+        pos_t endRefPos(beginRefPos);
+        if (isSmallVariant) endRefPos=endPos-1;
+
+        get_standardized_region_seq(_referenceFilename, chrom, beginRefPos, endRefPos, ref);
+
+        if (static_cast<unsigned>(1+endRefPos-beginRefPos) != ref.size())
+        {
+            using namespace illumina::common;
+
+            std::ostringstream oss;
+            oss << "ERROR: Unexpected reference allele size: " << ref.size() << "\n";
+            oss << "\tExpected: " << (1+endRefPos-beginRefPos) << "\n";
+            oss << "\tbeginRefPos: " << beginRefPos << " endRefPos: " << endRefPos << " isSmallVariant: " << isSmallVariant << "\n";
+            BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+        }
+    }
+
+    // build alt:
+    std::string alt;
+    if (isSmallVariant)
+    {
+        alt = ref[0] + sv.insertSeq;
+    }
+    else
+    {
+        alt = str( boost::format("<%s>") % svId.getLabel());
+    }
+
+    // build INFO field
+    std::vector<std::string> words;
+    split_string(svId.getLabel(),':',words);
+    {
+        // note that there's a reasonable argument for displaying these tags only when a
+        // symbolic allele is used (by a strict reading of the vcf spec) -- we instead
+        // print these fields for all variants for uniformity within the manta vcf:
+        //
+        infoTags.push_back( str(boost::format("END=%i") % endPos));
+        infoTags.push_back( str(boost::format("SVTYPE=%s") % words[0]));
+        const pos_t refLen(endPos-pos);
+        pos_t svLen(refLen);
+
+        if (! sv.isUnknownSizeInsertion)
+        {
+            if (isIndel)
+            {
+                const pos_t insertLen(static_cast<pos_t>(sv.insertSeq.size()));
+                if ( insertLen > refLen )
+                {
+                    svLen = insertLen;
+                }
+                else
+                {
+                    svLen = -refLen;
+                }
+            }
+            infoTags.push_back( str(boost::format("SVLEN=%i") % (svLen)));
+        }
+    }
+
+    if (isSmallVariant)
+    {
+        if (! sv.insertAlignment.empty())
+        {
+            std::string cigar;
+            apath_to_cigar(sv.insertAlignment,cigar);
+
+            // add the 1M to signify the leading reference base:
+            infoTags.push_back( str(boost::format("CIGAR=1M%s") % cigar));
+        }
+    }
+
+    if (isImprecise)
+    {
+        infoTags.push_back("IMPRECISE");
+    }
+
+    if (bpArange.size() > 1)
+    {
+        infoTags.push_back( str( boost::format("CIPOS=%i,%i") % (bpArange.begin_pos() - internal_pos) % ((bpArange.end_pos()-1) - internal_pos) ));
+    }
+
+    if (! isSmallVariant)
+    {
+        if (bpBrange.size() > 1)
+        {
+            infoTags.push_back( str( boost::format("CIEND=%i,%i") % (bpBrange.begin_pos() - internal_endPos) % ((bpBrange.end_pos()-1) - internal_endPos) ));
+        }
+    }
+
+    if (! isImprecise)
+    {
+        if (bpArange.size() > 1)
+        {
+            infoTags.push_back( str( boost::format("HOMLEN=%i") % (bpArange.size()-1) ));
+            std::string homref;
+            get_standardized_region_seq(_referenceFilename,chrom,bpArange.begin_pos()+1,bpArange.end_pos()-1,homref);
+            infoTags.push_back( str( boost::format("HOMSEQ=%s") % (homref) ));
+        }
+    }
+
+    if (! isSmallVariant)
+    {
+        if (! (sv.insertSeq.empty() || sv.isUnknownSizeInsertion))
+        {
+            infoTags.push_back( str( boost::format("SVINSLEN=%i") % (sv.insertSeq.size()) ));
+            if (isBp1First || (bpA.state != bpB.state))
+            {
+                infoTags.push_back( str( boost::format("SVINSSEQ=%s") % (sv.insertSeq) ));
+            }
+            else
+            {
+                infoTags.push_back( str( boost::format("SVINSSEQ=%s") % reverseCompCopyStr(sv.insertSeq) ));
+            }
+        }
+    }
+
+    if (sv.isUnknownSizeInsertion)
+    {
+        if (! sv.unknownSizeInsertionLeftSeq.empty())
+        {
+            infoTags.push_back( str( boost::format("LEFT_SVINSSEQ=%s") % (sv.unknownSizeInsertionLeftSeq) ));
+        }
+
+        if (! sv.unknownSizeInsertionRightSeq.empty())
+        {
+            infoTags.push_back( str( boost::format("RIGHT_SVINSSEQ=%s") % (sv.unknownSizeInsertionRightSeq) ));
+        }
+    }
+
+    if (svId.svType == EXTENDED_SV_TYPE::INVERSION)
+    {
+        if (sv.bp1.state == SVBreakendState::RIGHT_OPEN)
+        {
+            infoTags.push_back("INV3");
+        }
+        else if (sv.bp1.state == SVBreakendState::LEFT_OPEN)
+        {
+            infoTags.push_back("INV5");
+        }
+        else
+        {
+            assert(false && "Unexpected inversion configuration");
+        }
+    }
+
+    addSharedInfo(event, infoTags);
+
+    modifyInfo(event, infoTags);
+    modifyInvdelInfo(sv, isBp1First, infoTags);
+
+    modifySample(sv, sampleTags);
+
+    // write out record:
+    _os << chrom
+        << '\t' << pos
+        << '\t' << svId.localId // ID
+        << '\t' << ref // REF
+        << '\t' << alt // ALT
+        << '\t';
+    writeQual();
+    _os << '\t';
+    writeFilter();
+    _os << '\t';
+    makeInfoField(infoTags,_os); // INFO
+    makeFormatSampleField(sampleTags, _os); // FORMAT + SAMPLE
+    _os << '\n';
+}
+
+
+
+static
+bool
+isAcceptedSVType(
+    const EXTENDED_SV_TYPE::index_t svType)
+{
+    using namespace EXTENDED_SV_TYPE;
+
+    switch (svType)
+    {
+    case INTERTRANSLOC:
+    case INTRATRANSLOC:
+    case INVERSION:
+    case INSERT:
+    case DELETE:
+    case TANDUP:
+        return true;
+    default:
+        return false;
+    }
+}
+
+
+
+void
+VcfWriterSV::
+writeSVCore(
+    const SVCandidateSetData& svData,
+    const SVCandidateAssemblyData& adata,
+    const SVCandidate& sv,
+    const SVId& svId,
+    const EventInfo& event)
+{
+    using namespace EXTENDED_SV_TYPE;
+    const index_t svType(getExtendedSVType(sv, _isRNA));
+
+#ifdef DEBUG_VCF
+    log_os << "VcfWriterSV::writeSVCore svType: " << EXTENDED_SV_TYPE::label(svType) << "\n";
+#endif
+
+    if (! isAcceptedSVType(svType))
+    {
+        using namespace illumina::common;
+
+        std::ostringstream oss;
+        oss << "ERROR: sv candidate cannot be classified: " << sv << "\n";
+        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+    }
+
+    try
+    {
+        if (isSVTransloc(svType))
+        {
+            writeTranslocPair(sv, svId, svData, adata, event);
+        }
+        else
+        {
+            const bool isIndel(isSVIndel(svType));
+            writeInvdel(sv, svId, isIndel, event);
+        }
+    }
+    catch (...)
+    {
+        log_os << "Exception caught while attempting to write sv candidate to vcf: " << sv << "\n";
+        log_os << "\tsvId: " << svId.getLabel() << " ext-svType: " << EXTENDED_SV_TYPE::label(svType) << "\n";
+        throw;
+    }
+}
+
+
+
+void
+VcfWriterSV::
+writeFilters(
+    const std::set<std::string>& filters,
+    std::ostream& os)
+{
+    if (filters.empty())
+    {
+        os << "PASS";
+    }
+    else
+    {
+        bool isFirst(true);
+        for (const std::string& filter : filters)
+        {
+            if (isFirst)
+            {
+                isFirst=false;
+            }
+            else
+            {
+                os << ';';
+            }
+            os << filter;
+        }
+    }
+}
+
+
+
+void
+VcfWriterSV::
+writeFilters(
+    const std::set<std::string>& filters,
+    std::string& s)
+{
+    std::ostringstream oss;
+    writeFilters(filters,oss);
+    s = oss.str();
+}
diff --git a/src/c++/lib/format/VcfWriterSV.hh b/src/c++/lib/format/VcfWriterSV.hh
new file mode 100644
index 0000000..389c81f
--- /dev/null
+++ b/src/c++/lib/format/VcfWriterSV.hh
@@ -0,0 +1,187 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "manta/EventInfo.hh"
+#include "manta/JunctionIdGenerator.hh"
+#include "manta/SVCandidate.hh"
+#include "manta/SVCandidateAssemblyData.hh"
+#include "manta/SVCandidateSetData.hh"
+#include "svgraph/SVLocusSet.hh"
+
+#include <iosfwd>
+
+
+struct VcfWriterSV
+{
+    VcfWriterSV(
+        const std::string& referenceFilename,
+        const bool isRNA,
+        const SVLocusSet& set,
+        std::ostream& os);
+
+    virtual
+    ~VcfWriterSV() {}
+
+    void
+    writeHeader(
+        const char* progName,
+        const char* progVersion,
+        const std::vector<std::string>& sampleNames)
+    {
+        writeHeaderPrefix(progName, progVersion);
+        writeHeaderColumnKey(sampleNames);
+    }
+
+    typedef std::vector<std::string> InfoTag_t;
+    typedef std::vector<std::pair<std::string,std::vector<std::string> > > SampleTag_t;
+
+protected:
+    void
+    writeHeaderPrefix(
+        const char* progName,
+        const char* progVersion);
+
+    void
+    writeHeaderColumnKey(
+        const std::vector<std::string>& sampleNames) const;
+
+    virtual
+    void
+    addHeaderInfo() const {}
+
+    virtual
+    void
+    addHeaderFormat() const {}
+
+    virtual
+    void
+    addHeaderFilters() const {}
+
+    void
+    writeSVCore(
+        const SVCandidateSetData& svData,
+        const SVCandidateAssemblyData& adata,
+        const SVCandidate& sv,
+        const SVId& svId,
+        const EventInfo& event);
+
+    /// add info tags which can be customized by sub-class
+    virtual
+    void
+    modifyInfo(
+        const EventInfo& /*event*/,
+        InfoTag_t& /*infotags*/) const
+    {}
+
+    /// add info tags specific to translocations:
+    virtual
+    void
+    modifyTranslocInfo(
+        const SVCandidate& /*sv*/,
+        const bool /*isFirstOfPair*/,
+        InfoTag_t& /*infoTags*/) const
+    {}
+
+    /// add info tags specific to non-translocations:
+    virtual
+    void
+    modifyInvdelInfo(
+        const SVCandidate& /*sv*/,
+        const bool /*isBp1First*/,
+        InfoTag_t& /*infoTags*/) const
+    {}
+
+    virtual
+    void
+    writeQual() const
+    {
+        _os << '.';
+    }
+
+    virtual
+    void
+    writeFilter() const
+    {
+        _os << '.';
+    }
+
+    virtual
+    void
+    modifySample(
+        const SVCandidate& /*sv*/,
+        SampleTag_t& /*sampletags*/) const
+    {}
+
+    static
+    void
+    writeFilters(
+        const std::set<std::string>& filters,
+        std::ostream& os);
+
+    static
+    void
+    writeFilters(
+        const std::set<std::string>& filters,
+        std::string& s);
+
+private:
+
+    /// \param[in] isFirstBreakend if true report bp1, else report bp2
+    void
+    writeTransloc(
+        const SVCandidate& sv,
+        const SVId& svId,
+        const bool isFirstBreakend,
+        const SVCandidateSetData& svData,
+        const SVCandidateAssemblyData& adata,
+        const EventInfo& event);
+
+    void
+    writeTranslocPair(
+        const SVCandidate& sv,
+        const SVId& svId,
+        const SVCandidateSetData& svData,
+        const SVCandidateAssemblyData& adata,
+        const EventInfo& event);
+
+    /// \param isIndel if true, the variant is a simple right/left breakend insert/delete combination
+    void
+    writeInvdel(
+        const SVCandidate& sv,
+        const SVId& svId,
+        const bool isIndel,
+        const EventInfo& event);
+
+protected:
+    const std::string& _referenceFilename;
+    const bool _isRNA;
+
+private:
+    const bam_header_info& _header;
+protected:
+    std::ostream& _os;
+};
+
diff --git a/src/c++/lib/format/VcfWriterScoredSV.hh b/src/c++/lib/format/VcfWriterScoredSV.hh
new file mode 100644
index 0000000..e29cc2f
--- /dev/null
+++ b/src/c++/lib/format/VcfWriterScoredSV.hh
@@ -0,0 +1,58 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "manta/SVModelScoreInfo.hh"
+
+#include <cassert>
+
+
+struct VcfWriterScoredSV
+{
+protected:
+
+    void
+    setScoreInfo(
+        const SVScoreInfo& baseInfo)
+    {
+        _baseInfoPtr=&baseInfo;
+    }
+
+    void
+    clearScoreInfo()
+    {
+        _baseInfoPtr=nullptr;
+    }
+
+    const SVScoreInfo&
+    getBaseInfo() const
+    {
+        assert(nullptr != _baseInfoPtr);
+        return *_baseInfoPtr;
+    }
+
+private:
+    const SVScoreInfo* _baseInfoPtr = nullptr;
+};
diff --git a/src/c++/lib/format/VcfWriterSomaticSV.cpp b/src/c++/lib/format/VcfWriterSomaticSV.cpp
new file mode 100644
index 0000000..9454756
--- /dev/null
+++ b/src/c++/lib/format/VcfWriterSomaticSV.cpp
@@ -0,0 +1,163 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "format/VcfWriterSomaticSV.hh"
+
+#include "boost/algorithm/string/join.hpp"
+
+
+
+void
+VcfWriterSomaticSV::
+addHeaderInfo() const
+{
+    _os << "##INFO=<ID=BND_DEPTH,Number=1,Type=Integer,Description=\"Read depth at local translocation breakend\">\n";
+    _os << "##INFO=<ID=MATE_BND_DEPTH,Number=1,Type=Integer,Description=\"Read depth at remote translocation mate breakend\">\n";
+    _os << "##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description=\"Somatic mutation\">\n";
+    _os << "##INFO=<ID=SOMATICSCORE,Number=1,Type=Integer,Description=\"Somatic variant quality score\">\n";
+    _os << "##INFO=<ID=JUNCTION_SOMATICSCORE,Number=1,Type=Integer,Description=\"If the SV junctino is part of an EVENT (ie. a multi-adjacency variant), this field provides the SOMATICSCORE value for the adjacency in question only\">\n";
+}
+
+
+
+void
+VcfWriterSomaticSV::
+addHeaderFormat() const
+{
+    _os << "##FORMAT=<ID=PR,Number=.,Type=Integer,Description=\"Spanning paired-read support for the ref and alt alleles in the order listed\">\n";
+    _os << "##FORMAT=<ID=SR,Number=.,Type=Integer,Description=\"Split reads for the ref and alt alleles in the order listed, for reads where P(allele|read)>0.999\">\n";
+}
+
+
+
+void
+VcfWriterSomaticSV::
+addHeaderFilters() const
+{
+    if (_isMaxDepthFilter)
+    {
+        _os << "##FILTER=<ID=" << _somaticOpt.maxDepthFilterLabel << ",Description=\"Normal sample site depth is greater than " << _somaticOpt.maxDepthFactor << "x the median chromosome depth near one or both variant breakends\">\n";
+    }
+    _os << "##FILTER=<ID=" << _somaticOpt.minSomaticScoreLabel << ",Description=\"Somatic score is less than " << _somaticOpt.minPassSomaticScore << "\">\n";
+    _os << "##FILTER=<ID=" << _somaticOpt.maxMQ0FracLabel << ",Description=\"For a small variant (<1000 bases) in the normal sample, the fraction of reads with MAPQ0 around either breakend exceeds " << _somaticOpt.maxMQ0Frac << "\">\n";
+}
+
+
+
+void
+VcfWriterSomaticSV::
+modifyInfo(
+    const EventInfo& event,
+    std::vector<std::string>& infotags) const
+{
+    infotags.push_back("SOMATIC");
+    infotags.push_back( str(boost::format("SOMATICSCORE=%i") % getSomaticInfo().somaticScore) );
+
+    if (event.isEvent())
+    {
+        infotags.push_back( str(boost::format("JUNCTION_SOMATICSCORE=%i") % getSingleJunctionSomaticInfo().somaticScore) );
+    }
+}
+
+
+
+void
+VcfWriterSomaticSV::
+modifyTranslocInfo(
+    const SVCandidate& /*sv*/,
+    const bool isFirstOfPair,
+    std::vector<std::string>& infotags) const
+{
+    const SVScoreInfo& baseInfo(getBaseInfo());
+
+    infotags.push_back( str(boost::format("BND_DEPTH=%i") %
+                            (isFirstOfPair ? baseInfo.bp1MaxDepth : baseInfo.bp2MaxDepth) ) );
+    infotags.push_back( str(boost::format("MATE_BND_DEPTH=%i") %
+                            (isFirstOfPair ? baseInfo.bp2MaxDepth : baseInfo.bp1MaxDepth) ) );
+}
+
+
+
+void
+VcfWriterSomaticSV::
+modifySample(
+    const SVCandidate& sv,
+    SampleTag_t& sampletags) const
+{
+    const SVScoreInfo& baseInfo(getBaseInfo());
+    const unsigned sampleCount(baseInfo.samples.size());
+
+    std::vector<std::string> values(sampleCount);
+
+    for (unsigned sampleIndex(0); sampleIndex<sampleCount; ++sampleIndex)
+    {
+        const SVSampleInfo& sinfo(baseInfo.samples[sampleIndex]);
+        values[sampleIndex] = str( boost::format("%i,%i") % sinfo.ref.confidentSpanningPairCount % sinfo.alt.confidentSpanningPairCount);
+    }
+    sampletags.push_back(std::make_pair("PR",values));
+
+    if (sv.isImprecise()) return;
+
+    for (unsigned sampleIndex(0); sampleIndex<sampleCount; ++sampleIndex)
+    {
+        const SVSampleInfo& sinfo(baseInfo.samples[sampleIndex]);
+        values[sampleIndex] = str( boost::format("%i,%i") % sinfo.ref.confidentSplitReadCount % sinfo.alt.confidentSplitReadCount);
+    }
+    sampletags.push_back(std::make_pair("SR",values));
+}
+
+
+
+void
+VcfWriterSomaticSV::
+writeFilter() const
+{
+    writeFilters(getSomaticInfo().filters, _os);
+}
+
+
+
+void
+VcfWriterSomaticSV::
+writeSV(
+    const SVCandidateSetData& svData,
+    const SVCandidateAssemblyData& adata,
+    const SVCandidate& sv,
+    const SVId& svId,
+    const SVScoreInfo& baseInfo,
+    const SVScoreInfoSomatic& somaticInfo,
+    const EventInfo& event,
+    const SVScoreInfoSomatic& singleJunctionSomaticInfo)
+{
+    //TODO: this is a lame way to customize subclass behavior:
+    setScoreInfo(baseInfo);
+    _somaticInfoPtr=&somaticInfo;
+    _singleJunctionSomaticInfoPtr=&singleJunctionSomaticInfo;
+
+    writeSVCore(svData, adata, sv, svId, event);
+
+    clearScoreInfo();
+    _somaticInfoPtr=NULL;
+    _singleJunctionSomaticInfoPtr=NULL;
+}
diff --git a/src/c++/lib/format/VcfWriterSomaticSV.hh b/src/c++/lib/format/VcfWriterSomaticSV.hh
new file mode 100644
index 0000000..54e08be
--- /dev/null
+++ b/src/c++/lib/format/VcfWriterSomaticSV.hh
@@ -0,0 +1,111 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "manta/SVModelScoreInfo.hh"
+#include "format/VcfWriterSV.hh"
+#include "format/VcfWriterScoredSV.hh"
+#include "options/CallOptionsSomatic.hh"
+
+
+struct VcfWriterSomaticSV : public VcfWriterSV, VcfWriterScoredSV
+{
+    static const bool isRNA = false;
+
+    VcfWriterSomaticSV(
+        const CallOptionsSomatic& somaticOpt,
+        const bool isMaxDepthFilter,
+        const std::string& referenceFilename,
+        const SVLocusSet& set,
+        std::ostream& os) :
+        VcfWriterSV(referenceFilename, isRNA, set,os),
+        _somaticOpt(somaticOpt),
+        _isMaxDepthFilter(isMaxDepthFilter),
+        _somaticInfoPtr(nullptr),
+        _singleJunctionSomaticInfoPtr(nullptr)
+    {}
+
+    void
+    writeSV(
+        const SVCandidateSetData& svData,
+        const SVCandidateAssemblyData& adata,
+        const SVCandidate& sv,
+        const SVId& svId,
+        const SVScoreInfo& baseInfo,
+        const SVScoreInfoSomatic& somaticInfo,
+        const EventInfo& event,
+        const SVScoreInfoSomatic& singleJunctionSomaticInfo);
+
+private:
+
+    void
+    addHeaderInfo() const override;
+
+    void
+    addHeaderFormat() const override;
+
+    void
+    addHeaderFilters() const override;
+
+    void
+    modifyInfo(
+        const EventInfo& event,
+        std::vector<std::string>& infotags) const override;
+
+    void
+    modifyTranslocInfo(
+        const SVCandidate& sv,
+        const bool isFirstOfPair,
+        std::vector<std::string>& infotags) const override;
+
+    void
+    modifySample(
+        const SVCandidate& sv,
+        SampleTag_t& sampletags) const override;
+
+    void
+    writeFilter() const override;
+
+    const SVScoreInfoSomatic&
+    getSomaticInfo() const
+    {
+        assert(NULL != _somaticInfoPtr);
+        return *_somaticInfoPtr;
+    }
+
+    const SVScoreInfoSomatic&
+    getSingleJunctionSomaticInfo() const
+    {
+        assert(NULL != _singleJunctionSomaticInfoPtr);
+        return *_singleJunctionSomaticInfoPtr;
+    }
+
+
+    const CallOptionsSomatic& _somaticOpt;
+    const bool _isMaxDepthFilter;
+    const SVScoreInfoSomatic* _somaticInfoPtr;
+    const SVScoreInfoSomatic* _singleJunctionSomaticInfoPtr;
+};
+
diff --git a/src/c++/lib/format/VcfWriterTumorSV.cpp b/src/c++/lib/format/VcfWriterTumorSV.cpp
new file mode 100644
index 0000000..f3a8c6b
--- /dev/null
+++ b/src/c++/lib/format/VcfWriterTumorSV.cpp
@@ -0,0 +1,136 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Xiaoyu Chen
+///
+
+#include "format/VcfWriterTumorSV.hh"
+
+
+
+void
+VcfWriterTumorSV::
+addHeaderInfo() const
+{
+    _os << "##INFO=<ID=BND_DEPTH,Number=1,Type=Integer,Description=\"Read depth at local translocation breakend\">\n";
+    _os << "##INFO=<ID=MATE_BND_DEPTH,Number=1,Type=Integer,Description=\"Read depth at remote translocation mate breakend\">\n";
+}
+
+
+
+void
+VcfWriterTumorSV::
+addHeaderFormat() const
+{
+    _os << "##FORMAT=<ID=PR,Number=.,Type=Integer,Description=\"Spanning paired-read support for the ref and alt alleles in the order listed\">\n";
+    _os << "##FORMAT=<ID=SR,Number=.,Type=Integer,Description=\"Split reads for the ref and alt alleles in the order listed, for reads where P(allele|read)>0.999\">\n";
+}
+
+
+
+void
+VcfWriterTumorSV::
+addHeaderFilters() const
+{
+    if (_isMaxDepthFilter)
+    {
+        _os << "##FILTER=<ID=" << _tumorOpt.maxDepthFilterLabel << ",Description=\"Sample site depth is greater than " << _tumorOpt.maxDepthFactor << "x the median chromosome depth near one or both variant breakends\">\n";
+    }
+
+    _os << "##FILTER=<ID=" << _tumorOpt.maxMQ0FracLabel << ",Description=\"For a small variant (<1000 base), the fraction of reads with MAPQ0 around either breakend exceeds " << _tumorOpt.maxMQ0Frac << "\">\n";
+}
+
+
+
+void
+VcfWriterTumorSV::
+writeFilter() const
+{
+    writeFilters(getTumorInfo().filters, _os);
+}
+
+
+
+void
+VcfWriterTumorSV::
+modifySample(
+    const SVCandidate& sv,
+    SampleTag_t& sampletags) const
+{
+    const SVScoreInfo& baseInfo(getBaseInfo());
+    const unsigned sampleCount(baseInfo.samples.size());
+
+    std::vector<std::string> values(sampleCount);
+
+    for (unsigned sampleIndex(0); sampleIndex<sampleCount; ++sampleIndex)
+    {
+        const SVSampleInfo& sinfo(baseInfo.samples[sampleIndex]);
+        values[sampleIndex] = str( boost::format("%i,%i") % sinfo.ref.confidentSpanningPairCount % sinfo.alt.confidentSpanningPairCount);
+    }
+    sampletags.push_back(std::make_pair("PR",values));
+
+    if (sv.isImprecise()) return;
+
+    for (unsigned sampleIndex(0); sampleIndex<sampleCount; ++sampleIndex)
+    {
+        const SVSampleInfo& sinfo(baseInfo.samples[sampleIndex]);
+        values[sampleIndex] = str( boost::format("%i,%i") % sinfo.ref.confidentSplitReadCount % sinfo.alt.confidentSplitReadCount);
+    }
+    sampletags.push_back(std::make_pair("SR",values));
+}
+
+void
+VcfWriterTumorSV::
+modifyTranslocInfo(
+    const SVCandidate& /*sv*/,
+    const bool isFirstOfPair,
+    InfoTag_t& infotags) const
+{
+    const SVScoreInfo& baseInfo(getBaseInfo());
+
+    infotags.push_back( str(boost::format("BND_DEPTH=%i") %
+                            (isFirstOfPair ? baseInfo.bp1MaxDepth : baseInfo.bp2MaxDepth) ) );
+    infotags.push_back( str(boost::format("MATE_BND_DEPTH=%i") %
+                            (isFirstOfPair ? baseInfo.bp2MaxDepth : baseInfo.bp1MaxDepth) ) );
+}
+
+
+
+void
+VcfWriterTumorSV::
+writeSV(
+    const SVCandidateSetData& svData,
+    const SVCandidateAssemblyData& adata,
+    const SVCandidate& sv,
+    const SVId& svId,
+    const SVScoreInfo& baseInfo,
+    const SVScoreInfoTumor& tumorInfo,
+    const EventInfo& event
+)
+{
+    //TODO: this is a lame way to customize subclass behavior:
+    setScoreInfo(baseInfo);
+    _tumorInfoPtr=&tumorInfo;
+    writeSVCore(svData, adata, sv, svId, event);
+
+    clearScoreInfo();
+    _tumorInfoPtr=nullptr;
+}
diff --git a/src/c++/lib/format/VcfWriterTumorSV.hh b/src/c++/lib/format/VcfWriterTumorSV.hh
new file mode 100644
index 0000000..babfac2
--- /dev/null
+++ b/src/c++/lib/format/VcfWriterTumorSV.hh
@@ -0,0 +1,96 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Xiaoyu Chen
+///
+
+#pragma once
+
+#include "format/VcfWriterSV.hh"
+#include "format/VcfWriterScoredSV.hh"
+#include "options/CallOptionsTumor.hh"
+
+
+struct VcfWriterTumorSV : public VcfWriterSV, VcfWriterScoredSV
+{
+    static const bool isRNA = false;
+
+    VcfWriterTumorSV(
+        const CallOptionsTumor& tumorOpt,
+        const bool isMaxDepthFilter,
+        const std::string& referenceFilename,
+        const SVLocusSet& set,
+        std::ostream& os) :
+        VcfWriterSV(referenceFilename, isRNA, set, os),
+        _tumorOpt(tumorOpt),
+        _isMaxDepthFilter(isMaxDepthFilter),
+        _tumorInfoPtr(nullptr)
+    {}
+
+    void
+    writeSV(
+        const SVCandidateSetData& svData,
+        const SVCandidateAssemblyData& adata,
+        const SVCandidate& sv,
+        const SVId& svId,
+        const SVScoreInfo& baseInfo,
+        const SVScoreInfoTumor& tumorInfo,
+        const EventInfo& event);
+
+private:
+
+    void
+    addHeaderInfo() const override;
+
+    void
+    addHeaderFormat() const override;
+
+    void
+    addHeaderFilters() const override;
+
+    void
+    writeFilter() const override;
+
+    void
+    modifySample(
+        const SVCandidate& sv,
+        SampleTag_t& sampletags) const override;
+
+    void
+    modifyTranslocInfo(
+        const SVCandidate& sv,
+        const bool isFirstOfPair,
+        InfoTag_t& infotags) const override;
+
+
+
+    const SVScoreInfoTumor&
+    getTumorInfo() const
+    {
+        assert(NULL != _tumorInfoPtr);
+        return *_tumorInfoPtr;
+    }
+
+    const CallOptionsTumor& _tumorOpt;
+    const bool _isMaxDepthFilter;
+    const SVScoreInfoTumor* _tumorInfoPtr;
+};
+
diff --git a/src/c++/lib/htsapi/CMakeLists.txt b/src/c++/lib/htsapi/CMakeLists.txt
new file mode 100644
index 0000000..9511b67
--- /dev/null
+++ b/src/c++/lib/htsapi/CMakeLists.txt
@@ -0,0 +1,28 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+###############################################################################
+##
+## Configuration file for the c++/blt_util subfolder
+##
+## author Come Raczy
+##
+################################################################################
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/htsapi/SimpleAlignment_bam_util.cpp b/src/c++/lib/htsapi/SimpleAlignment_bam_util.cpp
new file mode 100644
index 0000000..0bc31e6
--- /dev/null
+++ b/src/c++/lib/htsapi/SimpleAlignment_bam_util.cpp
@@ -0,0 +1,65 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+///
+///
+
+#include "SimpleAlignment_bam_util.hh"
+#include "htsapi/align_path_bam_util.hh"
+
+
+
+void
+getAlignment(
+    const bam_record& bamRead,
+    SimpleAlignment& al)
+{
+    al.is_fwd_strand=bamRead.is_fwd_strand();
+    al.tid=bamRead.target_id();
+    al.pos=(bamRead.pos()-1);
+
+    bam_cigar_to_apath(bamRead.raw_cigar(),bamRead.n_cigar(),al.path);
+}
+
+
+
+SimpleAlignment
+getAlignment(
+    const bam_record& bamRead)
+{
+    SimpleAlignment al;
+    getAlignment(bamRead,al);
+    return al;
+}
+
+
+SimpleAlignment
+getFakeMateAlignment(
+    const bam_record& bamRead)
+{
+    SimpleAlignment al;
+    assert(! bamRead.is_mate_unmapped());
+    al.is_fwd_strand=bamRead.is_mate_fwd_strand();
+    al.tid=bamRead.mate_target_id();
+    al.pos=(bamRead.mate_pos()-1);
+    al.path.emplace_back(ALIGNPATH::MATCH, bamRead.read_size());
+    return al;
+}
diff --git a/src/c++/lib/htsapi/SimpleAlignment_bam_util.hh b/src/c++/lib/htsapi/SimpleAlignment_bam_util.hh
new file mode 100644
index 0000000..c1fb25a
--- /dev/null
+++ b/src/c++/lib/htsapi/SimpleAlignment_bam_util.hh
@@ -0,0 +1,45 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+///
+///
+
+#pragma once
+
+#include "blt_util/SimpleAlignment.hh"
+#include "htsapi/bam_record.hh"
+
+
+void
+getAlignment(
+    const bam_record& bamRead,
+    SimpleAlignment& al);
+
+SimpleAlignment
+getAlignment(
+    const bam_record& bamRead);
+
+/// generate a mate alignment, assuming same read length and perfect alignment
+SimpleAlignment
+getFakeMateAlignment(
+    const bam_record& bamRead);
+
+
diff --git a/src/c++/lib/htsapi/align_path_bam_util.cpp b/src/c++/lib/htsapi/align_path_bam_util.cpp
new file mode 100644
index 0000000..e3676e1
--- /dev/null
+++ b/src/c++/lib/htsapi/align_path_bam_util.cpp
@@ -0,0 +1,85 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "htsapi/align_path_bam_util.hh"
+
+#include <cassert>
+
+
+using namespace ALIGNPATH;
+
+
+
+void
+bam_cigar_to_apath(const uint32_t* bam_cigar,
+                   const unsigned n_cigar,
+                   path_t& apath)
+{
+    // this assertion isn't really required...
+    //    assert(n_cigar>0);
+    apath.resize(n_cigar);
+    for (unsigned i(0); i<n_cigar; ++i)
+    {
+        apath[i].length=(bam_cigar[i]>>BAM_CIGAR_SHIFT);
+        apath[i].type = static_cast<align_t>(1+(bam_cigar[i]&BAM_CIGAR_MASK));
+    }
+}
+
+
+
+void
+apath_to_bam_cigar(const path_t& apath,
+                   uint32_t* bam_cigar)
+{
+    const unsigned as(apath.size());
+    for (unsigned i(0); i<as; ++i)
+    {
+        const path_segment& ps(apath[i]);
+        assert(ps.type != NONE);
+        bam_cigar[i] = (ps.length<<BAM_CIGAR_SHIFT | (static_cast<uint32_t>(ps.type)-1));
+    }
+}
+
+
+
+void
+edit_bam_cigar(const path_t& apath,
+               bam1_t& br)
+{
+    bam1_core_t& bc(br.core);
+
+    const int old_n_cigar(bc.n_cigar);
+    const int new_n_cigar(apath.size());
+    const int delta(4*(new_n_cigar-old_n_cigar));
+
+    if (0 != delta)
+    {
+        const int end(bc.l_qname+(4*old_n_cigar));
+        change_bam_data_segment_len(end,delta,br);
+        bc.n_cigar=new_n_cigar;
+    }
+
+    //update content of cigar array:
+    apath_to_bam_cigar(apath,bam_get_cigar(&br));
+}
diff --git a/src/c++/lib/htsapi/align_path_bam_util.hh b/src/c++/lib/htsapi/align_path_bam_util.hh
new file mode 100644
index 0000000..f964b48
--- /dev/null
+++ b/src/c++/lib/htsapi/align_path_bam_util.hh
@@ -0,0 +1,51 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/align_path.hh"
+#include "htsapi/bam_util.hh"
+
+
+/// convert internal BAM cigar representation directly into a path:
+///
+void
+bam_cigar_to_apath(const uint32_t* bam_cigar,
+                   const unsigned n_cigar,
+                   ALIGNPATH::path_t& apath);
+
+/// convert apath to internal BAM cigar representation:
+///
+/// bam_cigar should already be set to apath.size() capacity
+///
+void
+apath_to_bam_cigar(const ALIGNPATH::path_t& apath,
+                   uint32_t* bam_cigar);
+
+/// convert apath into a CIGAR string and replace CIGAR in BAM record
+///
+void
+edit_bam_cigar(const ALIGNPATH::path_t& apath,
+               bam1_t& br);
+
diff --git a/src/c++/lib/htsapi/bam_dumper.cpp b/src/c++/lib/htsapi/bam_dumper.cpp
new file mode 100644
index 0000000..d89a243
--- /dev/null
+++ b/src/c++/lib/htsapi/bam_dumper.cpp
@@ -0,0 +1,91 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "blt_util/blt_exception.hh"
+#include "blt_util/log.hh"
+#include "htsapi/bam_dumper.hh"
+
+#include <cassert>
+#include <cstdlib>
+
+#include <iostream>
+#include <sstream>
+
+
+bam_dumper::
+bam_dumper(const char* filename,
+           const bam_hdr_t& header)
+    : _hdr(&header),
+      _stream_name(filename)
+{
+    assert(nullptr != filename);
+
+    _hfp = hts_open(filename, "wb");
+
+    if (nullptr == _hfp)
+    {
+        std::ostringstream oss;
+        oss << "Failed to open SAM/BAM/CRAM file for writing: '" << filename << "'";
+        throw blt_exception(oss.str().c_str());
+    }
+
+    const int retval = sam_hdr_write(_hfp,_hdr);
+    if (retval != 0)
+    {
+        std::ostringstream oss;
+        oss << "Failed to write SAM/BAM/CRAM file header for: '" << filename << "'";
+        throw blt_exception(oss.str().c_str());
+    }
+}
+
+
+bam_dumper::
+~bam_dumper()
+{
+    if (nullptr != _hfp)
+    {
+        const int retval = hts_close(_hfp);
+        if (retval != 0)
+        {
+            log_os << "Failed to close SAM/BAM/CRAM file: '" << name() <<"'\n";
+            std::exit(EXIT_FAILURE);
+        }
+    }
+}
+
+
+
+void
+bam_dumper::
+put_record(const bam1_t* brec)
+{
+    const int retval = sam_write1(_hfp,_hdr,brec);
+    if (retval < 0)
+    {
+        std::ostringstream oss;
+        oss << "Failed to write new record to BAM file: '" << name() << "'";
+        throw blt_exception(oss.str().c_str());
+    }
+}
+
diff --git a/src/c++/lib/htsapi/bam_dumper.hh b/src/c++/lib/htsapi/bam_dumper.hh
new file mode 100644
index 0000000..d9703d7
--- /dev/null
+++ b/src/c++/lib/htsapi/bam_dumper.hh
@@ -0,0 +1,52 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "sam_util.hh"
+
+#include <string>
+
+
+struct bam_dumper
+{
+    bam_dumper(
+        const char* filename,
+        const bam_hdr_t& header);
+
+    ~bam_dumper();
+
+    void
+    put_record(const bam1_t* brec);
+
+    const char* name() const
+    {
+        return _stream_name.c_str();
+    }
+
+private:
+    htsFile* _hfp;
+    const bam_hdr_t* _hdr;
+
+    std::string _stream_name;
+};
diff --git a/src/c++/lib/htsapi/bam_header_info.cpp b/src/c++/lib/htsapi/bam_header_info.cpp
new file mode 100644
index 0000000..21bd6fd
--- /dev/null
+++ b/src/c++/lib/htsapi/bam_header_info.cpp
@@ -0,0 +1,53 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \author Chris Saunders
+///
+
+#include "bam_header_info.hh"
+
+#include <iostream>
+
+
+
+bam_header_info::
+bam_header_info(const bam_hdr_t& header)
+{
+    for (int i(0); i<header.n_targets; ++i)
+    {
+        chrom_data.emplace_back(header.target_name[i],header.target_len[i]);
+        chrom_to_index[header.target_name[i]] = (int32_t)i;
+    }
+}
+
+
+std::ostream&
+operator<<(std::ostream& os, const bam_header_info& bhi)
+{
+    unsigned index(0);
+
+    os << "chomosome_id_map:\n";
+    for (const bam_header_info::chrom_info& info : bhi.chrom_data)
+    {
+        os << "index: " << index << " label: " << info.label << " length: " << info.length << '\n';
+        index++;
+    }
+    return os;
+}
diff --git a/src/c++/lib/htsapi/bam_header_info.hh b/src/c++/lib/htsapi/bam_header_info.hh
new file mode 100644
index 0000000..3f82e86
--- /dev/null
+++ b/src/c++/lib/htsapi/bam_header_info.hh
@@ -0,0 +1,108 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "bam_util.hh"
+
+#include "blt_util/thirdparty_push.h"
+
+#include "boost/serialization/string.hpp"
+#include "boost/serialization/vector.hpp"
+#include "boost/serialization/map.hpp"
+
+#include "blt_util/thirdparty_pop.h"
+
+#include <iosfwd>
+#include <string>
+#include <vector>
+#include <map>
+
+
+/// minimal c++ bam header info
+///
+/// this class replicates the minimum information
+/// from the bam header required to parse regions
+/// (ie. chr1:20-30). It is friendlier to mem management
+/// and serialization than using the samtools struct
+///
+struct bam_header_info
+{
+    bam_header_info()
+    {}
+
+    explicit
+    bam_header_info(const bam_hdr_t& header);
+
+    bool
+    operator==(const bam_header_info& rhs) const
+    {
+        const unsigned data_size(chrom_data.size());
+        if (chrom_data.size() != rhs.chrom_data.size()) return false;
+        for (unsigned i(0); i<data_size; ++i)
+        {
+            if (chrom_data[i] == rhs.chrom_data[i]) continue;
+            return false;
+        }
+        return true;
+    }
+
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /* version */)
+    {
+        ar& chrom_data;
+        ar& chrom_to_index;
+    }
+
+    struct chrom_info
+    {
+        chrom_info(
+            const char* init_label = NULL,
+            const unsigned init_length = 0) :
+            label((NULL==init_label) ? "" : init_label ),
+            length(init_length)
+        {}
+
+        bool
+        operator==(const chrom_info& rhs) const
+        {
+            return ((label == rhs.label) && (length == rhs.length));
+        }
+
+        template<class Archive>
+        void serialize(Archive& ar, const unsigned /* version */)
+        {
+            ar& label& length;
+        }
+
+        std::string label;
+        unsigned length;
+    };
+
+    std::vector<chrom_info> chrom_data;
+    std::map<std::string, int32_t> chrom_to_index;
+};
+
+
+std::ostream&
+operator<<(std::ostream& os, const bam_header_info& bhi);
diff --git a/src/c++/lib/htsapi/bam_header_util.cpp b/src/c++/lib/htsapi/bam_header_util.cpp
new file mode 100644
index 0000000..13feea5
--- /dev/null
+++ b/src/c++/lib/htsapi/bam_header_util.cpp
@@ -0,0 +1,217 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "htsapi/bam_header_util.hh"
+#include "blt_util/blt_exception.hh"
+#include "blt_util/parse_util.hh"
+#include "blt_util/string_util.hh"
+
+#include <cassert>
+#include <cstring>
+
+#include <algorithm>
+#include <limits>
+#include <sstream>
+
+
+
+void
+parse_bam_region(
+    const char* region,
+    std::string& chrom,
+    int32_t& begin_pos,
+    int32_t& end_pos)
+{
+    // make first split:
+    const char* afterChrom;
+    {
+        static const char region_sep1(':');
+        afterChrom = strrchr(region,region_sep1);
+        if (nullptr != afterChrom)
+        {
+            chrom=std::string(region,afterChrom);
+            assert((*afterChrom) != '\0');
+            afterChrom++;
+        }
+    }
+
+
+    bool isWholeChrom(nullptr == afterChrom);
+
+    if (! isWholeChrom)
+    {
+        // make second split
+        static const char region_sep2('-');
+        std::vector<std::string> words2;
+        split_string(afterChrom,region_sep2,words2);
+
+        if (words2.empty() || (words2.size() > 2))
+        {
+            std::ostringstream oss;
+            oss << "ERROR: can't parse begin and end positions from bam_region '" << region << "'\n";
+            throw blt_exception(oss.str().c_str());
+        }
+
+        if (words2.size() == 2)
+        {
+            begin_pos = (illumina::blt_util::parse_int_str(words2[0]))-1;
+            end_pos = (illumina::blt_util::parse_int_str(words2[1]));
+        }
+        else
+        {
+            // this exception allows for chrom names with colons (HLA...) but no positions included
+            isWholeChrom=true;
+        }
+    }
+
+    if (isWholeChrom)
+    {
+        chrom=region;
+        begin_pos = 0;
+        end_pos = std::numeric_limits<int32_t>::max();
+
+    }
+
+    if (chrom.empty())
+    {
+        std::ostringstream oss;
+        oss << "ERROR: can't parse contig name from bam_region '" << region << "'\n";
+        throw blt_exception(oss.str().c_str());
+    }
+
+    if ((begin_pos<0) || (end_pos<0) || (end_pos<=begin_pos))
+    {
+        std::ostringstream oss;
+        oss << "ERROR: nonsensical begin (" << begin_pos << ") and end (" << end_pos << ") positions parsed from bam_region '" << region << "'\n";
+        throw blt_exception(oss.str().c_str());
+    }
+}
+
+
+
+void
+parse_bam_region_from_hdr(
+    const bam_hdr_t* header,
+    const char* region,
+    int32_t& tid,
+    int32_t& begin_pos,
+    int32_t& end_pos)
+{
+    assert(nullptr != header);
+    assert(nullptr != region);
+
+    std::string chrom;
+    parse_bam_region(region,chrom,begin_pos,end_pos);
+
+    tid = bam_name2id(const_cast<bam_hdr_t*>(header),chrom.c_str());
+
+    if (tid < 0)
+    {
+        std::ostringstream oss;
+        oss << "ERROR: contig '" << chrom << "' from bam_region '" << region << "' not found in BAM/CRAM header\n";
+        throw blt_exception(oss.str().c_str());
+    }
+
+    end_pos = std::min(end_pos,static_cast<int32_t>(header->target_len[tid]));
+}
+
+
+
+void
+parse_bam_region(
+    const bam_header_info& header,
+    const char* region,
+    int32_t& tid,
+    int32_t& begin_pos,
+    int32_t& end_pos)
+{
+    assert(nullptr != region);
+
+    std::string chrom;
+    parse_bam_region(region,chrom,begin_pos,end_pos);
+
+    const auto citer(header.chrom_to_index.find(chrom));
+
+    if (citer == header.chrom_to_index.end())
+    {
+        std::ostringstream oss;
+        oss << "ERROR: contig '" << chrom << "' from bam_region '" << region << "' not found in BAM/CRAM header\n";
+        throw blt_exception(oss.str().c_str());
+    }
+
+    tid = citer->second;
+    end_pos = std::min(end_pos,static_cast<int32_t>(header.chrom_data[tid].length));
+}
+
+
+
+bool
+check_header_compatibility(
+    const bam_hdr_t& h1,
+    const bam_hdr_t& h2)
+{
+    if (h1.n_targets != h2.n_targets)
+    {
+        return false;
+    }
+
+    for (int32_t i(0); i<h1.n_targets; ++i)
+    {
+        if (h1.target_len[i] != h2.target_len[i]) return false;
+        if (0 != strcmp(h1.target_name[i],h2.target_name[i])) return false;
+    }
+    return true;
+}
+
+
+
+std::string
+get_bam_header_sample_name(
+    const bam_hdr_t& header,
+    const char* default_sample_name)
+{
+    assert(nullptr != default_sample_name);
+
+    std::vector<std::string> lines;
+    std::vector<std::string> words;
+    split_string(header.text,'\n',lines);
+    for (const auto& line : lines)
+    {
+        split_string(line,'\t',words);
+        if ((! words.empty()) && (words.front() == "@RG"))
+        {
+            for (const auto& word : words)
+            {
+                static const std::string prefix("SM:");
+                const auto res = std::mismatch(prefix.begin(), prefix.end(), word.begin());
+
+                if (res.first == prefix.end())
+                {
+                    return std::string(res.second, word.end());
+                }
+            }
+        }
+    }
+    return default_sample_name;
+}
diff --git a/src/c++/lib/htsapi/bam_header_util.hh b/src/c++/lib/htsapi/bam_header_util.hh
new file mode 100644
index 0000000..8cc22b4
--- /dev/null
+++ b/src/c++/lib/htsapi/bam_header_util.hh
@@ -0,0 +1,81 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+/// \brief bam record manipulation functions
+///
+
+#pragma once
+
+#include "bam_util.hh"
+#include "bam_header_info.hh"
+
+#include <string>
+
+
+/// parse a bam region into chrom/begin/end values
+///
+void
+parse_bam_region(
+    const char* region,
+    std::string& chrom,
+    int32_t& begin_pos,
+    int32_t& end_pos);
+
+
+/// parse a bam region into chrom-index/begin/end values based
+/// on chromosome index lookup and end positions in bam header
+///
+void
+parse_bam_region_from_hdr(
+    const bam_hdr_t* header,
+    const char* region,
+    int32_t& tid,
+    int32_t& begin_pos,
+    int32_t& end_pos);
+
+
+void
+parse_bam_region(
+    const bam_header_info& header,
+    const char* region,
+    int32_t& tid,
+    int32_t& begin_pos,
+    int32_t& end_pos);
+
+
+/// return true only if the headers refer to the same
+/// reference sequences in the same order.
+///
+bool
+check_header_compatibility(
+    const bam_hdr_t& h1,
+    const bam_hdr_t& h2);
+
+
+/// try to determine the sample_name from the BAM/CRAM header
+/// if none found return default string value
+std::string
+get_bam_header_sample_name(
+    const bam_hdr_t& header,
+    const char* default_sample_name = "SAMPLE");
diff --git a/src/c++/lib/htsapi/bam_record.cpp b/src/c++/lib/htsapi/bam_record.cpp
new file mode 100644
index 0000000..8e2591f
--- /dev/null
+++ b/src/c++/lib/htsapi/bam_record.cpp
@@ -0,0 +1,130 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "blt_util/blt_exception.hh"
+#include "htsapi/align_path_bam_util.hh"
+#include "htsapi/bam_record.hh"
+
+#include <iostream>
+#include <sstream>
+
+
+
+std::ostream&
+operator<<(std::ostream& os, const bam_record& br)
+{
+    if (br.empty())
+    {
+        os << "NONE";
+    }
+    else
+    {
+        os << br.qname() << "/" << br.read_no()
+           << " tid:pos:strand " << br.target_id() << ":" << (br.pos()-1) << ":" << (br.is_fwd_strand() ? '+' : '-');
+
+        ALIGNPATH::path_t apath;
+        bam_cigar_to_apath(br.raw_cigar(),br.n_cigar(),apath);
+        os << " cigar: " << apath;
+
+        /// print SAtag if present:
+        static const char satag[] = {'S','A'};
+        const char* saStr(br.get_string_tag(satag));
+        if (nullptr != saStr)
+        {
+            os  << " sa: " << saStr;
+        }
+        if (br.is_secondary())
+        {
+            os << " issec";
+        }
+        if (br.is_supplement())
+        {
+            os << " issupp";
+        }
+
+        if (br.is_paired())
+        {
+            os  << " mate_tid:pos:strand " << br.mate_target_id() << ":" << (br.mate_pos()-1) << ":" << (br.is_mate_fwd_strand() ? '+' : '-');
+        }
+    }
+    return os;
+}
+
+
+
+unsigned
+bam_record::
+alt_map_qual(const char* tag) const
+{
+    uint8_t* alt_ptr(bam_aux_get(_bp,tag));
+    if ((NULL != alt_ptr) && is_int_code(alt_ptr[0]))
+    {
+        const int alt_map(bam_aux2i(alt_ptr));
+        if (alt_map<0)
+        {
+            std::ostringstream oss;
+            oss << "ERROR: Unexpected negative value in optional BAM/CRAM tag: '" << std::string(tag,2) << "'\n";
+            throw blt_exception(oss.str().c_str());
+        }
+        return static_cast<unsigned>(alt_map);
+    }
+    else
+    {
+        return map_qual();
+    }
+}
+
+
+
+const char*
+bam_record::
+get_string_tag(const char* tag) const
+{
+    // retrieve the BAM tag
+    uint8_t* pTag = bam_aux_get(_bp, tag);
+    if (!pTag) return nullptr;
+
+    // skip tags that are not encoded as a null-terminated string
+    if (pTag[0] != 'Z') return nullptr;
+    ++pTag;
+
+    return (const char*)pTag;
+}
+
+
+
+bool
+bam_record::
+get_num_tag(const char* tag, int32_t& num) const
+{
+    // retrieve the BAM tag
+    uint8_t* pTag = bam_aux_get(_bp, tag);
+    if (!pTag) return false;
+
+    // skip tags that are not encoded as integers
+    if (!is_int_code(pTag[0])) return false;
+    num = bam_aux2i(pTag);
+
+    return true;
+}
diff --git a/src/c++/lib/htsapi/bam_record.hh b/src/c++/lib/htsapi/bam_record.hh
new file mode 100644
index 0000000..c95aa23
--- /dev/null
+++ b/src/c++/lib/htsapi/bam_record.hh
@@ -0,0 +1,372 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "bam_util.hh"
+#include "bam_seq.hh"
+
+#include <iosfwd>
+
+
+struct bam_record
+{
+    bam_record()
+        : _bp(bam_init1()) {}
+
+    ~bam_record()
+    {
+        freeBam();
+    }
+
+    bam_record(const bam_record& br)
+        : _bp(br.empty() ? bam_init1() : bam_dup1(br._bp)) {}
+
+    const bam_record&
+    operator=(const bam_record& br)
+    {
+        if (this == &br) return (*this);
+
+        if (empty())
+        {
+            if (! br.empty())
+            {
+                freeBam();
+                _bp=bam_dup1(br._bp);
+            }
+            // else empty->empty : do nothing...
+        }
+        else
+        {
+            if (! br.empty())
+            {
+                bam_copy1(_bp,br._bp);
+            }
+            else
+            {
+                freeBam();
+                _bp=bam_init1();
+            }
+        }
+        return (*this);
+    }
+
+private:
+    const bam_record&
+    operator==(const bam_record& rhs);
+public:
+
+    const char*
+    qname() const
+    {
+        return reinterpret_cast<const char*>(_bp->data);
+    }
+
+    void
+    set_qname(const char* name)
+    {
+        edit_bam_qname(name,*_bp);
+    }
+
+    bool is_paired() const
+    {
+        return ((_bp->core.flag & BAM_FLAG::PAIRED) != 0);
+    }
+    bool is_proper_pair() const
+    {
+        return ((_bp->core.flag & BAM_FLAG::PROPER_PAIR) != 0);
+    }
+    bool is_unmapped() const
+    {
+        return ((_bp->core.flag & BAM_FLAG::UNMAPPED) != 0);
+    }
+    bool is_mate_unmapped() const
+    {
+        return ((_bp->core.flag & BAM_FLAG::MATE_UNMAPPED) != 0);
+    }
+    bool is_fwd_strand() const
+    {
+        return (! ((_bp->core.flag & BAM_FLAG::STRAND) != 0));
+    }
+    bool is_mate_fwd_strand() const
+    {
+        return (! ((_bp->core.flag & BAM_FLAG::MATE_STRAND) != 0));
+    }
+    bool is_dup() const
+    {
+        return ((_bp->core.flag & BAM_FLAG::DUPLICATE) != 0);
+    }
+    bool is_filter() const
+    {
+        return ((_bp->core.flag & BAM_FLAG::FILTER) != 0);
+    }
+    bool is_first() const
+    {
+        return ((_bp->core.flag & BAM_FLAG::FIRST_READ) != 0);
+    }
+    bool is_second() const
+    {
+        return ((_bp->core.flag & BAM_FLAG::SECOND_READ) != 0);
+    }
+    bool is_secondary() const
+    {
+        return ((_bp->core.flag & BAM_FLAG::SECONDARY) != 0);
+    }
+    bool is_supplement() const
+    {
+        return ((_bp->core.flag & BAM_FLAG::SUPPLEMENT) != 0);
+    }
+
+    void toggle_is_paired()
+    {
+        _bp->core.flag ^= BAM_FLAG::PAIRED;
+    }
+    void toggle_is_unmapped()
+    {
+        _bp->core.flag ^= BAM_FLAG::UNMAPPED;
+    }
+    void toggle_is_mate_unmapped()
+    {
+        _bp->core.flag ^= BAM_FLAG::MATE_UNMAPPED;
+    }
+    void toggle_is_fwd_strand()
+    {
+        _bp->core.flag ^= BAM_FLAG::STRAND;
+    }
+    void toggle_is_mate_fwd_strand()
+    {
+        _bp->core.flag ^= BAM_FLAG::MATE_STRAND;
+    }
+    void toggle_is_first()
+    {
+        _bp->core.flag ^= BAM_FLAG::FIRST_READ;
+    }
+    void toggle_is_second()
+    {
+        _bp->core.flag ^= BAM_FLAG::SECOND_READ;
+    }
+    void toggle_is_secondary()
+    {
+        _bp->core.flag ^= BAM_FLAG::SECONDARY;
+    }
+
+    int read_no() const
+    {
+        return  ( (is_second() && (! is_first())) ? 2 : 1 );
+    }
+
+    int target_id() const
+    {
+        return _bp->core.tid;
+    }
+
+    int mate_target_id() const
+    {
+        return _bp->core.mtid;
+    }
+
+    bool is_chimeric() const
+    {
+        return ((target_id()!=mate_target_id()) && (target_id()>=0) && (mate_target_id()>=0));
+    }
+
+    int pos() const
+    {
+        return (_bp->core.pos+1);
+    }
+
+    int mate_pos() const
+    {
+        return (_bp->core.mpos+1);
+    }
+
+    uint8_t map_qual() const
+    {
+        return _bp->core.qual;
+    }
+
+    /// does read contain the new SA split-read tag?
+    bool
+    isSASplit() const
+    {
+        static const char satag[] = {'S','A'};
+        return (nullptr != get_string_tag(satag));
+    }
+
+    /// test if the read is supplemental by more liberal criteria
+    ///
+    /// this generalizes the bam flag for supplemental to work
+    /// correctly with the workaround typified by the bwamem '-M'
+    /// option, which uses the 'secondary' flag
+    bool
+    isNonStrictSupplement() const
+    {
+        if (is_supplement()) return true;
+        if (! is_secondary()) return false;
+        return isSASplit();
+    }
+
+
+    /// return single read mapping score if it exists,
+    /// else return MAPQ:
+    unsigned se_map_qual() const
+    {
+        static const char smtag[] = {'S','M'};
+        return alt_map_qual(smtag);
+    }
+
+    int32_t template_size() const
+    {
+        return _bp->core.isize;
+    }
+
+
+    /// Test if SM and AM fields both exist and are equal to zero. Any
+    /// other result returns false:
+    ///
+    bool
+    is_unanchored() const
+    {
+        if (! is_paired()) return false;
+        static const char amtag[] = {'A','M'};
+        uint8_t* am_ptr(bam_aux_get(_bp,amtag));
+        if (NULL == am_ptr)  return false;
+        static const char smtag[] = {'S','M'};
+        uint8_t* sm_ptr(bam_aux_get(_bp,smtag));
+        if (NULL == sm_ptr)  return false;
+        return (is_int_code(am_ptr[0]) &&
+                is_int_code(sm_ptr[0]) &&
+                (0 == bam_aux2i(am_ptr)) &&
+                (0 == bam_aux2i(sm_ptr)));
+    }
+
+    const uint32_t* raw_cigar() const
+    {
+        return bam_get_cigar(_bp);
+    }
+    unsigned n_cigar() const
+    {
+        return _bp->core.n_cigar;
+    }
+
+    unsigned read_size() const
+    {
+        return _bp->core.l_qseq;
+    }
+
+    bam_seq get_bam_read() const
+    {
+        return bam_seq(bam_get_seq(_bp),read_size());
+    }
+
+    /// get string AUX field, return NULL if field is not found, or field is not a string
+    ///
+    /// \param[in] tag AUX field tag. This is a char array of length two, null term is not required
+    ///
+    /// example tag: static const char smtag[] = {'S','M'};
+    ///
+    const char* get_string_tag(const char* tag) const;
+
+    bool get_num_tag(const char* tag, int32_t& num) const;
+
+    const uint8_t* qual() const
+    {
+        return bam_get_qual(_bp);
+    }
+
+    void
+    set_target_id(int32_t tid)
+    {
+        if (tid<-1) tid=-1;
+        _bp->core.tid=tid;
+    }
+
+    // read should be null terminated, qual should already have offset removed:
+    //
+    void
+    set_readqual(const char* read,
+                 const uint8_t* init_qual)
+    {
+        edit_bam_read_and_quality(read,init_qual,*_bp);
+    }
+
+    bam1_t*
+    get_data()
+    {
+        return _bp;
+    }
+
+    const bam1_t*
+    get_data() const
+    {
+        return _bp;
+    }
+
+    bool
+    empty() const
+    {
+        assert(NULL != _bp);
+        return (_bp->l_data == 0);
+    }
+
+private:
+    friend struct bam_streamer;
+
+    unsigned alt_map_qual(const char* tag) const;
+
+    static
+    bool
+    is_int_code(char c)
+    {
+        switch (c)
+        {
+        case 'c' :
+        case 's' :
+        case 'i' :
+        case 'C' :
+        case 'S' :
+        case 'I' :
+            return true;
+        default  :
+            return false;
+        }
+    }
+
+    void
+    freeBam()
+    {
+        if (NULL != _bp)
+        {
+            if (NULL != _bp->data) free(_bp->data);
+            free(_bp);
+        }
+    }
+
+    bam1_t* _bp;
+};
+
+
+std::ostream&
+operator<<(std::ostream& os, const bam_record& br);
+
diff --git a/src/c++/lib/htsapi/bam_record_util.cpp b/src/c++/lib/htsapi/bam_record_util.cpp
new file mode 100644
index 0000000..f24bb84
--- /dev/null
+++ b/src/c++/lib/htsapi/bam_record_util.cpp
@@ -0,0 +1,141 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+///
+///
+
+#include "bam_record_util.hh"
+#include "align_path_bam_util.hh"
+
+
+bool
+is_mapped_pair(
+    const bam_record& bam_read)
+{
+    if (! bam_read.is_paired()) return false;
+    if (bam_read.is_unmapped() || bam_read.is_mate_unmapped()) return false;
+    return true;
+}
+
+
+
+bool
+is_mapped_chrom_pair(
+    const bam_record& bam_read)
+{
+    if (! is_mapped_pair(bam_read)) return false;
+    if (bam_read.target_id() != bam_read.mate_target_id()) return false;
+    return true;
+}
+
+
+
+// note this is designed to return true for the common case
+// of pos == mate_pos occurring for short FFPE fragments
+//
+bool
+is_innie_pair(
+    const bam_record& bam_read)
+{
+    if (! is_mapped_chrom_pair(bam_read)) return false;
+    if (bam_read.is_fwd_strand() == bam_read.is_mate_fwd_strand()) return false;
+
+    if     (bam_read.pos() < bam_read.mate_pos())
+    {
+        if (! bam_read.is_fwd_strand()) return false;
+    }
+    else if (bam_read.pos() > bam_read.mate_pos())
+    {
+        if (  bam_read.is_fwd_strand()) return false;
+    }
+
+    return true;
+}
+
+
+
+bool
+is_possible_adapter_pair(
+    const bam_record& bamRead)
+{
+    if (! is_mapped_chrom_pair(bamRead)) return false;
+    if (bamRead.is_fwd_strand() == bamRead.is_mate_fwd_strand()) return false;
+
+    // get range of alignment before matching softclip:
+    int posDiff(bamRead.mate_pos()-bamRead.pos());
+    if (! bamRead.is_fwd_strand())
+    {
+        posDiff *= -1;
+    }
+    return ((posDiff < 10) && (posDiff > -50));
+}
+
+
+
+bool
+is_overlapping_pair(
+    const bam_record& bamRead,
+    const SimpleAlignment& matchedAlignment)
+{
+    if (! is_mapped_chrom_pair(bamRead)) return false;
+    if (bamRead.is_fwd_strand() == bamRead.is_mate_fwd_strand()) return false;
+
+    // we want a substantial gap between pos and mate-pos before we switch from
+    // treating the read pair as an overlapping standard mate pair to a duplicate pair.
+    //static const int dupDist(50);
+
+    // get range of alignment after matching all softclip:
+    if (bamRead.is_fwd_strand())
+    {
+        // is this likely to be a duplication read pair?:
+        //if (((matchedAlignment.pos+1)-bamRead.mate_pos()) >= dupDist) return false;
+
+        const pos_t matchedEnd(matchedAlignment.pos + apath_ref_length(matchedAlignment.path));
+        return (matchedEnd >= bamRead.mate_pos());
+    }
+    else
+    {
+        // is this likely to be a duplication read pair?:
+        //if ((bamRead.mate_pos()-(matchedAlignment.pos+1)) >= dupDist) return false;
+
+        const pos_t matchedBegin(matchedAlignment.pos);
+        return (matchedBegin <= bamRead.mate_pos());
+    }
+}
+
+
+
+unsigned
+get_avg_quality(
+    const bam_record& bam_read)
+{
+    const unsigned len(bam_read.read_size());
+    if (0 == len) return 0;
+
+    const uint8_t* qual(bam_read.qual());
+    unsigned sum(0);
+    for (unsigned i(0); i<len; ++i)
+    {
+        sum+=qual[i];
+    }
+    // this does not capture the decimal remainder but well...
+    return (sum/len);
+}
diff --git a/src/c++/lib/htsapi/bam_record_util.hh b/src/c++/lib/htsapi/bam_record_util.hh
new file mode 100644
index 0000000..5562e1f
--- /dev/null
+++ b/src/c++/lib/htsapi/bam_record_util.hh
@@ -0,0 +1,98 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+///
+///
+
+#pragma once
+
+#include "blt_util/align_path.hh"
+#include "blt_util/SimpleAlignment.hh"
+#include "htsapi/bam_record.hh"
+
+
+/// is this read part of a pair where both members are mapped?
+bool
+is_mapped_pair(
+    const bam_record& bam_read);
+
+
+/// is this read part of a pair where both members are mapped to the same chrom?
+///
+bool
+is_mapped_chrom_pair(
+    const bam_record& bam_read);
+
+/// is this read part of mapped pair with 'Innie' orientation?
+///
+/// Note this does not test MAPQ or fragment size, but could
+/// be used as the core of a 'proper-pair' predicate
+bool
+is_innie_pair(
+    const bam_record& bam_read);
+
+/// detect cases where paired-end reads overlap in such a way as to suggest a possible unfiltered
+/// read into adapter sequence (assuming innie pairs)
+bool
+is_possible_adapter_pair(
+    const bam_record& bamRead);
+
+/// detect cases where paired-end reads overlap (i.e. a fragment shorter than the combined read-length).
+///
+/// note this is an approximation because it's based on a single bam record, an
+/// exact answer would require both records in the pair. In practice, this should
+/// be good enough.
+bool
+is_overlapping_pair(
+    const bam_record& bam_read,
+    const SimpleAlignment& matchedAlignment);
+
+/// return average basecall qscore for this read
+unsigned
+get_avg_quality(
+    const bam_record& bam_read);
+
+/// select 'first' read in pair such that you
+/// consistently get only one read per-pair
+/// (assuming the bam file is properly formated)
+inline
+bool
+isFirstRead(
+    const bam_record& bamRead)
+{
+    if (bamRead.pos() < bamRead.mate_pos()) return true;
+    if ((bamRead.pos() == bamRead.mate_pos()) && bamRead.is_first()) return true;
+    return false;
+}
+
+/// get BAM RG tag, return an empty string "" if no RG tag exists:
+inline
+const char*
+getReadGroup(
+    const bam_record& bamRead)
+{
+    static const char defaultRG[] = "";
+    static const char rgTag[] = {'R','G'};
+
+    const char* rgStr(bamRead.get_string_tag(rgTag));
+
+    return ((NULL == rgStr) ? defaultRG : rgStr);
+}
diff --git a/src/c++/lib/htsapi/bam_seq.cpp b/src/c++/lib/htsapi/bam_seq.cpp
new file mode 100644
index 0000000..bf78950
--- /dev/null
+++ b/src/c++/lib/htsapi/bam_seq.cpp
@@ -0,0 +1,42 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \file
+
+/// \author Chris Saunders
+///
+
+#include "htsapi/bam_seq.hh"
+
+#include <iostream>
+
+
+std::ostream&
+operator<<(std::ostream& os,
+           const bam_seq_base& bs)
+{
+
+    const unsigned rs(bs.size());
+    for (unsigned i(0); i<rs; ++i)
+    {
+        os << bs.get_char(i);
+    }
+    return os;
+}
diff --git a/src/c++/lib/htsapi/bam_seq.hh b/src/c++/lib/htsapi/bam_seq.hh
new file mode 100644
index 0000000..9190a13
--- /dev/null
+++ b/src/c++/lib/htsapi/bam_seq.hh
@@ -0,0 +1,316 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/blt_types.hh"
+#include "blt_util/PolymorphicObject.hh"
+#include "blt_util/reference_contig_segment.hh"
+#include "blt_util/seq_util.hh"
+
+#include <cassert>
+
+#include <algorithm>
+#include <iosfwd>
+#include <string>
+
+
+namespace BAM_BASE
+{
+enum index_t
+{
+    REF = 0x0,
+    A = 0x1,
+    C = 0x2,
+    G = 0x4,
+    T = 0x8,
+    ANY = 0xF
+};
+}
+
+
+inline
+char
+get_bam_seq_char(const uint8_t a)
+{
+    using namespace BAM_BASE;
+
+    switch (a)
+    {
+    case REF:
+        return '=';
+    case A:
+        return 'A';
+    case C:
+        return 'C';
+    case G:
+        return 'G';
+    case T:
+        return 'T';
+    default:
+        return 'N';
+    }
+}
+
+
+inline
+char
+get_bam_seq_complement_char(const uint8_t a)
+{
+    using namespace BAM_BASE;
+
+    switch (a)
+    {
+    case REF:
+        return '=';
+    case A:
+        return 'T';
+    case C:
+        return 'G';
+    case G:
+        return 'C';
+    case T:
+        return 'A';
+    default:
+        return 'N';
+    }
+}
+
+
+inline
+uint8_t
+get_bam_seq_code(const char c)
+{
+    using namespace BAM_BASE;
+
+    switch (c)
+    {
+    case '=':
+        return REF;
+    case 'A':
+        return A;
+    case 'C':
+        return C;
+    case 'G':
+        return G;
+    case 'T':
+        return T;
+    default:
+        return ANY;
+    }
+}
+
+
+inline
+uint8_t
+bam_seq_code_to_id(const uint8_t a,
+                   const uint8_t ref = BAM_BASE::ANY)
+{
+    using namespace BAM_BASE;
+
+    switch (a)
+    {
+    case REF:
+        return bam_seq_code_to_id(ref);
+    case A:
+        return 0;
+    case C:
+        return 1;
+    case G:
+        return 2;
+    case T:
+        return 3;
+    case ANY:
+        return 4;
+    default:
+        base_error("bam_seq_code_to_id",a);
+        return 4;
+    }
+}
+
+
+// interface to bam_seq -- allows us to pass either compressed
+// sequences from bam files and regular strings using the same
+// object:
+//
+struct bam_seq_base : public PolymorphicObject
+{
+    virtual uint8_t get_code(pos_t i) const = 0;
+
+    virtual char get_char(const pos_t i) const = 0;
+
+    virtual unsigned size() const = 0;
+
+protected:
+    bool
+    is_in_range(const pos_t i) const
+    {
+        return ((i>=0) && (i<static_cast<pos_t>(size())));
+    }
+};
+
+std::ostream& operator<<(std::ostream& os, const bam_seq_base& bs);
+
+
+//
+//
+struct bam_seq : public bam_seq_base
+{
+    bam_seq(const uint8_t* s,
+            const uint16_t init_size,
+            const uint16_t offset=0)
+        : _s(s), _size(init_size), _offset(offset) {}
+
+#if 0
+    bam_seq(const bam_seq bs,
+            const uint16_t size,
+            const uint16_t offset=0)
+        : _s(bs.s), _size(size), _offset(bs.offset+offset)
+    {
+        assert((offset+size)<=bs.size);
+    }
+#endif
+
+    uint8_t
+    get_code(pos_t i) const override
+    {
+        if (! is_in_range(i)) return BAM_BASE::ANY;
+        i += static_cast<pos_t>(_offset);
+        return _s[(i/2)] >> 4*(1-(i%2)) & 0xf;
+    }
+
+    char
+    get_char(const pos_t i) const override
+    {
+        return get_bam_seq_char(get_code(i));
+    }
+
+    char
+    get_complement_char(const pos_t i) const
+    {
+        return get_bam_seq_complement_char(get_code(i));
+    }
+
+    std::string
+    get_string() const
+    {
+        std::string s(_size,'N');
+        for (unsigned i(0); i<_size; ++i)
+        {
+            s[i] = get_char(i);
+        }
+        return s;
+    }
+
+    // returns the reverse complement
+    std::string
+    get_rc_string() const
+    {
+        std::string s(_size,'N');
+        for (unsigned i(0); i<_size; ++i)
+        {
+            s[i] = get_complement_char(i);
+        }
+        std::reverse(s.begin(),s.end());
+        return s;
+    }
+
+    unsigned size() const override
+    {
+        return _size;
+    }
+
+private:
+    const uint8_t* _s;
+    uint16_t _size;
+    uint16_t _offset;
+};
+
+
+//
+//
+struct string_bam_seq : public bam_seq_base
+{
+    explicit
+    string_bam_seq(const std::string& s)
+        : _s(s.c_str()), _size(s.size()) {}
+
+    string_bam_seq(const char* s,
+                   const unsigned init_size)
+        : _s(s), _size(init_size) {}
+
+    uint8_t
+    get_code(pos_t i) const override
+    {
+        return get_bam_seq_code(get_char(i));
+    }
+
+    char
+    get_char(const pos_t i) const override
+    {
+        if (! is_in_range(i)) return 'N';
+        return _s[i];
+    }
+
+    unsigned size() const override
+    {
+        return _size;
+    }
+
+private:
+    const char* _s;
+    unsigned _size;
+};
+
+
+//
+//
+struct rc_segment_bam_seq : public bam_seq_base
+{
+    explicit
+    rc_segment_bam_seq(const reference_contig_segment& r)
+        : _r(r)
+    {}
+
+    uint8_t
+    get_code(pos_t i) const override
+    {
+        return get_bam_seq_code(get_char(i));
+    }
+
+    char
+    get_char(const pos_t i) const override
+    {
+        return _r.get_base(i);
+    }
+
+    unsigned size() const override
+    {
+        return _r.end();
+    }
+
+private:
+    const reference_contig_segment& _r;
+};
+
diff --git a/src/c++/lib/htsapi/bam_seq_read_util.cpp b/src/c++/lib/htsapi/bam_seq_read_util.cpp
new file mode 100644
index 0000000..0189f66
--- /dev/null
+++ b/src/c++/lib/htsapi/bam_seq_read_util.cpp
@@ -0,0 +1,69 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+
+#include "bam_seq_read_util.hh"
+
+
+
+static
+void
+get_read_align_strand_end_skip(const bam_seq& bseq,
+                               unsigned& end_skip)
+{
+    unsigned read_end(bseq.size());
+
+    while (read_end>0)
+    {
+        if (bseq.get_char(read_end-1)=='N') read_end--;
+        else break;
+    }
+
+    end_skip=bseq.size()-read_end;
+}
+
+
+
+void
+get_read_fwd_strand_skip(const bam_seq& bseq,
+                         const bool is_fwd_strand,
+                         unsigned& begin_skip,
+                         unsigned& end_skip)
+{
+    begin_skip=0;
+    if (is_fwd_strand)
+    {
+        get_read_align_strand_end_skip(bseq,end_skip);
+    }
+    else
+    {
+        end_skip=0;
+        const unsigned bsize(bseq.size());
+        while (begin_skip<bsize)
+        {
+            if (bseq.get_char(begin_skip)=='N') begin_skip++;
+            else break;
+        }
+    }
+}
diff --git a/src/c++/lib/htsapi/bam_seq_read_util.hh b/src/c++/lib/htsapi/bam_seq_read_util.hh
new file mode 100644
index 0000000..06b085f
--- /dev/null
+++ b/src/c++/lib/htsapi/bam_seq_read_util.hh
@@ -0,0 +1,35 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// \file
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "htsapi/bam_seq.hh"
+
+
+void
+get_read_fwd_strand_skip(const bam_seq& bseq,
+                         const bool is_fwd_strand,
+                         unsigned& begin_skip,
+                         unsigned& end_skip);
diff --git a/src/c++/lib/htsapi/bam_streamer.cpp b/src/c++/lib/htsapi/bam_streamer.cpp
new file mode 100644
index 0000000..7640e02
--- /dev/null
+++ b/src/c++/lib/htsapi/bam_streamer.cpp
@@ -0,0 +1,297 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "blt_util/blt_exception.hh"
+#include "blt_util/log.hh"
+#include "htsapi/bam_header_util.hh"
+#include "htsapi/bam_streamer.hh"
+
+#include <cassert>
+#include <cstdlib>
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+
+
+bam_streamer::
+bam_streamer(
+    const char* filename,
+    const char* region)
+    : _is_record_set(false),
+      _hfp(nullptr),
+      _hdr(nullptr),
+      _hidx(nullptr),
+      _hitr(nullptr),
+      _record_no(0),
+      _stream_name(filename),
+      _is_region(false)
+{
+    assert(nullptr != filename);
+    if ('\0' == *filename)
+    {
+        throw blt_exception("Can't initialize bam_streamer with empty filename\n");
+    }
+
+    _hfp = hts_open(filename, "rb");
+
+    if (nullptr == _hfp)
+    {
+        std::ostringstream oss;
+        oss << "Failed to open SAM/BAM/CRAM file for reading: '" << name() << "'";
+        throw blt_exception(oss.str().c_str());
+    }
+
+    _hdr = sam_hdr_read(_hfp);
+
+    if (nullptr == _hdr)
+    {
+        std::ostringstream oss;
+        oss << "Failed to parse header from SAM/BAM/CRAM file: " << name();
+        throw blt_exception(oss.str().c_str());
+    }
+
+    if (nullptr == region)
+    {
+        // read the whole BAM file:
+
+        if (_hdr->n_targets)
+        {
+            // parse any contig name so that header->hash is created
+            // ignore returned tid value, so doesn't matter if fake name
+            // exists
+            target_name_to_id("fake_name");
+        }
+        return;
+    }
+
+    // read a specific region of the bam file:
+    set_new_region(region);
+}
+
+
+
+bam_streamer::
+~bam_streamer()
+{
+    if (nullptr != _hitr) hts_itr_destroy(_hitr);
+    if (nullptr != _hidx) hts_idx_destroy(_hidx);
+    if (nullptr != _hdr) bam_hdr_destroy(_hdr);
+    if (nullptr != _hfp)
+    {
+        const int retval = hts_close(_hfp);
+        if (retval != 0)
+        {
+            log_os << "ERROR: Failed to close SAM/BAM/CRAM file: '" << name() << "'\n";
+            std::exit(EXIT_FAILURE);
+        }
+    }
+}
+
+
+
+static
+bool
+fexists(const char* filename)
+{
+    std::ifstream ifile(filename);
+    return (! ifile.fail());
+}
+
+
+
+static
+bool
+hasEnding(
+    const std::string& fullString,
+    const std::string& ending)
+{
+    if (fullString.length() < ending.length()) return false;
+    return (0 == fullString.compare (fullString.length() - ending.length(), ending.length(), ending));
+}
+
+
+
+// load index if it hasn't been set already:
+void
+bam_streamer::
+_load_index()
+{
+    /// TODO: Find out whether _hidx can be destroyed after the HTS
+    /// iterator is created, in which case this could be a local
+    /// variable. Until we know, _hidx should persist for the lifetime
+    /// of _hiter
+    if (nullptr != _hidx) return;
+
+    std::string index_base(name());
+
+    // hack to allow GATK/Picard bai name convention:
+    if ((! fexists((index_base+".bai").c_str())) &&
+        (! fexists((index_base+".csi").c_str())) &&
+        (! fexists((index_base+".crai").c_str())))
+    {
+        static const std::string bamext(".bam");
+        if (hasEnding(index_base,bamext))
+        {
+            index_base=index_base.substr(0,index_base.length()-bamext.length());
+        }
+    }
+
+    _hidx = sam_index_load(_hfp, index_base.c_str());
+    if (nullptr == _hidx)
+    {
+        std::ostringstream oss;
+        oss << "BAM/CRAM index is not available for file: " << name();
+        throw blt_exception(oss.str().c_str());
+    }
+}
+
+
+
+void
+bam_streamer::
+set_new_region(const char* region)
+{
+    int32_t ref,beg,end;
+    parse_bam_region_from_hdr(_hdr, region, ref, beg, end);
+
+    try
+    {
+        set_new_region(ref,beg,end);
+        _region=region;
+    }
+    catch (const std::exception& /*e*/)
+    {
+        log_os << "ERROR: exception while fetching BAM/CRAM region: '" << region
+               << "' from file '" << name() << "'\n";
+        throw;
+    }
+}
+
+
+
+void
+bam_streamer::
+set_new_region(const int ref, const int beg, const int end)
+{
+    if (nullptr != _hitr) hts_itr_destroy(_hitr);
+
+    _load_index();
+
+    if (ref < 0)
+    {
+        std::ostringstream oss;
+        oss << "Invalid region (contig index: " << ref << ") specified for BAM/CRAM file: " << name();
+        throw blt_exception(oss.str().c_str());
+    }
+
+    _hitr = sam_itr_queryi(_hidx,ref,beg,end);
+    if (_hitr == nullptr)
+    {
+        std::ostringstream oss;
+        oss << "Failed to fetch region: #" << ref << ":" << beg << "-" << end << " specified for BAM/CRAM file: " << name();
+        throw blt_exception(oss.str().c_str());
+    }
+    _is_region = true;
+    _region.clear();
+
+    _is_record_set = false;
+    _record_no = 0;
+}
+
+
+bool
+bam_streamer::
+next()
+{
+    if (nullptr == _hfp) return false;
+
+    int ret;
+    if (nullptr == _hitr)
+    {
+        ret = sam_read1(_hfp,_hdr, _brec._bp);
+    }
+    else
+    {
+        ret = sam_itr_next(_hfp, _hitr, _brec._bp);
+    }
+
+    _is_record_set=(ret >= 0);
+    if (_is_record_set) _record_no++;
+
+    return _is_record_set;
+}
+
+
+
+const char*
+bam_streamer::
+target_id_to_name(const int32_t tid) const
+{
+    // assert(tid < _bfp->header->n_targets);
+    if (tid<0)
+    {
+        static const char unmapped[] = "*";
+        return unmapped;
+    }
+    return _hdr->target_name[tid];
+}
+
+
+
+int32_t
+bam_streamer::
+target_name_to_id(const char* seq_name) const
+{
+    return bam_name2id(_hdr,seq_name);
+}
+
+
+
+void
+bam_streamer::
+report_state(std::ostream& os) const
+{
+    const bam_record* bamp(get_record_ptr());
+
+    os << "\tbam_stream_label: " << name() << "\n";
+    if (_is_region && (! _region.empty()))
+    {
+        os << "\tbam_stream_selected_region: " << _region << "\n";
+    }
+    if (nullptr != bamp)
+    {
+        os << "\tbam_stream_record_no: " << record_no() << "\n";
+        os << "\tbam_record QNAME/read_number: " << bamp->qname() << "/" << bamp->read_no() << "\n";
+        const char* chrom_name(target_id_to_name(bamp->target_id()));
+        os << "\tbam record RNAME: " << chrom_name << "\n";
+        os << "\tbam record POS: " << bamp->pos() << "\n";
+
+    }
+    else
+    {
+        os << "\tno bam record currently set\n";
+    }
+}
diff --git a/src/c++/lib/htsapi/bam_streamer.hh b/src/c++/lib/htsapi/bam_streamer.hh
new file mode 100644
index 0000000..080be5b
--- /dev/null
+++ b/src/c++/lib/htsapi/bam_streamer.hh
@@ -0,0 +1,118 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "htsapi/bam_record.hh"
+#include "htsapi/sam_util.hh"
+
+#include "boost/utility.hpp"
+
+#include <string>
+
+
+/// Stream bam records from CRAM/BAM/SAM files. For CRAM/BAM
+/// files you can run an indexed stream from a specific genome region.
+///
+//
+// Example use:
+// while (stream.next()) {
+//     const bam_record& read(*(stream.get_record_ptr()));
+//     if(read.is_unmapped) foo++;
+// }
+//
+struct bam_streamer : public boost::noncopyable
+{
+    /// \param filename CRAM/BAM/SAM input file
+    /// \param region if filename is indexed CRAM or BAM, you can
+    ///        restrict the stream to a specific region
+    explicit
+    bam_streamer(const char* filename,
+                 const char* region = nullptr);
+
+    ~bam_streamer();
+
+    /// \brief set new or first region for file:
+    ///
+    /// \param region if ctor filename is indexed CRAM or BAM, you can
+    ///        restrict the stream to a specific region
+    void
+    set_new_region(const char* region);
+
+    /// \brief set new or first region for file:
+    ///
+    /// \param beg zero-indexed start pos
+    /// \param end zero-indexed end pos
+    void
+    set_new_region(
+        int reg, int beg, int end);
+
+    bool next();
+
+    const bam_record* get_record_ptr() const
+    {
+        if (_is_record_set) return &_brec;
+        else                return nullptr;
+    }
+
+    const char* name() const
+    {
+        return _stream_name.c_str();
+    }
+
+    unsigned record_no() const
+    {
+        return _record_no;
+    }
+
+    void report_state(std::ostream& os) const;
+
+    const char*
+    target_id_to_name(const int32_t tid) const;
+
+    int32_t
+    target_name_to_id(const char* seq_name) const;
+
+    const bam_hdr_t&
+    get_header() const
+    {
+        return *(_hdr);
+    }
+
+private:
+    void _load_index();
+
+    bool _is_record_set;
+    htsFile* _hfp;
+    bam_hdr_t* _hdr;
+    hts_idx_t* _hidx;
+    hts_itr_t* _hitr;
+    bam_record _brec;
+
+    // track for debug only:
+    unsigned _record_no;
+    std::string _stream_name;
+    bool _is_region;
+    std::string _region;
+};
diff --git a/src/c++/lib/htsapi/bam_util.cpp b/src/c++/lib/htsapi/bam_util.cpp
new file mode 100644
index 0000000..ea46767
--- /dev/null
+++ b/src/c++/lib/htsapi/bam_util.cpp
@@ -0,0 +1,177 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "bam_seq.hh"
+#include "bam_util.hh"
+#include "blt_util/log.hh"
+
+#include <cassert>
+
+#include <iostream>
+
+
+
+static
+void
+change_bam_data_len(const int new_len,
+                    bam1_t& br)
+{
+    assert(new_len>=0);
+
+    if (new_len > br.m_data)
+    {
+        br.m_data = new_len;
+        kroundup32(br.m_data);
+        br.data = (uint8_t*) realloc(br.data,br.m_data);
+        if (nullptr == br.data)
+        {
+            log_os << "ERROR: failed to realloc BAM data size to: " << new_len << "\n";
+            exit(EXIT_FAILURE);
+        }
+    }
+    br.l_data = new_len;
+}
+
+
+
+void
+change_bam_data_segment_len(const int end,
+                            const int delta,
+                            bam1_t& br)
+{
+    assert(end>=0);
+    if (0==delta) return;
+    const int old_len(br.l_data);
+    const int new_len(old_len+delta);
+    const int tail_size(old_len-end);
+    assert(tail_size>=0);
+    change_bam_data_len(new_len,br);
+
+    // move post-segment data to its new position:
+    if (0==tail_size) return;
+    uint8_t* old_tail_ptr(br.data+end);
+    uint8_t* new_tail_ptr(old_tail_ptr+delta);
+    memmove(new_tail_ptr,old_tail_ptr,tail_size);
+}
+
+
+
+void
+edit_bam_qname(const char* name,
+               bam1_t& br)
+{
+    bam1_core_t& bc(br.core);
+
+    const uint32_t tmp_size(strlen(name)+1);
+    if (tmp_size & 0xffffff00)
+    {
+        log_os << "ERROR: name is too long to be entered in BAM qname field: " << name << "\n";
+        exit(EXIT_FAILURE);
+    }
+    const uint8_t new_qname_size(tmp_size);
+    const uint8_t old_qname_size(bc.l_qname);
+    const int delta(new_qname_size-old_qname_size);
+
+    if (0 != delta)
+    {
+        change_bam_data_segment_len(old_qname_size,delta,br);
+        bc.l_qname=new_qname_size;
+    }
+
+    strcpy(bam_get_qname(&br),name);
+}
+
+
+static
+inline
+int seq_size(const int a)
+{
+    return a+(a+1)/2;
+}
+
+
+void
+edit_bam_read_and_quality(const char* read,
+                          const uint8_t* qual,
+                          bam1_t& br)
+{
+    const int new_len(strlen(read));
+    const int old_size(seq_size(br.core.l_qseq));
+    const int new_size(seq_size(new_len));
+    const int delta(new_size-old_size);
+
+    if (0 != delta)
+    {
+        const int end(bam_get_aux(&br)-br.data);
+        change_bam_data_segment_len(end,delta,br);
+    }
+    br.core.l_qseq = new_len;
+    // update seq:
+    uint8_t* p(bam_get_seq(&br));
+    memset(p,0,(new_len+1)/2);
+    for (int i(0); i<new_len; ++i)
+    {
+        p[i/2] |= get_bam_seq_code(read[i]) << 4*(1-i%2);
+    }
+    // update qual
+    memcpy(bam_get_qual(&br),qual,new_len);
+}
+
+
+
+void
+nuke_bam_aux_field(bam1_t& br,
+                   const char* tag)
+{
+    while (true)
+    {
+        uint8_t* p(bam_aux_get(&br,tag));
+        if (nullptr==p) return;
+        bam_aux_del(&br,p);
+    }
+}
+
+
+
+// optimize storage of an unsigned int in bam
+void
+bam_aux_append_unsigned(bam1_t& br,
+                        const char* tag,
+                        uint32_t x)
+{
+    if       (x & 0xffff0000)
+    {
+        bam_aux_append(&br,tag,'I',4,reinterpret_cast<uint8_t*>(&x));
+    }
+    else if (x & 0xff00)
+    {
+        uint16_t y(x);
+        bam_aux_append(&br,tag,'S',2,reinterpret_cast<uint8_t*>(&y));
+    }
+    else
+    {
+        uint8_t z(x);
+        bam_aux_append(&br,tag,'C',1,&z);
+    }
+}
diff --git a/src/c++/lib/htsapi/bam_util.hh b/src/c++/lib/htsapi/bam_util.hh
new file mode 100644
index 0000000..5d3f65f
--- /dev/null
+++ b/src/c++/lib/htsapi/bam_util.hh
@@ -0,0 +1,156 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+/// \brief bam record manipulation functions
+///
+
+#pragma once
+
+#include "blt_util/thirdparty_push.h"
+
+extern "C" {
+#include <unistd.h> // this simplifies zlib on windows
+#define __STDC_LIMIT_MACROS
+#include "htslib/bgzf.h"
+#include "htslib/sam.h"
+}
+
+#include "blt_util/thirdparty_pop.h"
+
+/// pull two remaining functions in from samtools API:
+
+/*!
+  @abstract    Calculate the minimum bin that contains a region [beg,end).
+  @param  beg  start of the region, 0-based
+  @param  end  end of the region, 0-based
+  @return      bin
+ */
+static inline int bam_reg2bin(uint32_t beg, uint32_t end)
+{
+    return hts_reg2bin(beg, end, 14, 5);
+}
+
+/*!
+  @abstract Calculate the rightmost coordinate of an alignment on the
+  reference genome.
+
+  @param  c      pointer to the bam1_core_t structure
+  @param  cigar  the corresponding CIGAR array (from bam1_t::cigar)
+  @return        the rightmost coordinate, 0-based
+*/
+static inline uint32_t bam_calend(const bam1_core_t* c, const uint32_t* cigar)
+{
+    return c->pos + (c->n_cigar? bam_cigar2rlen(c->n_cigar, cigar) : 1);
+}
+
+
+namespace BAM_FLAG
+{
+enum index_t
+{
+    PAIRED        = 0x001,
+    PROPER_PAIR   = 0x002,
+    UNMAPPED      = 0x004,
+    MATE_UNMAPPED = 0x008,
+    STRAND        = 0x010,
+    MATE_STRAND   = 0x020,
+    FIRST_READ    = 0x040,
+    SECOND_READ   = 0x080,
+    SECONDARY     = 0x100,
+    FILTER        = 0x200,
+    DUPLICATE     = 0x400,
+    SUPPLEMENT    = 0x800
+};
+}
+
+
+/// insert new qname
+///
+void
+edit_bam_qname(const char* name,
+               bam1_t& br);
+
+/// Read length is taken from the read string. Assumes offset has
+/// already been removed from qual
+void
+edit_bam_read_and_quality(const char* read,
+                          const uint8_t* qual,
+                          bam1_t& br);
+
+/// remove all copies of optional field "tag"
+///
+void
+nuke_bam_aux_field(bam1_t& br,
+                   const char* tag);
+
+
+/// store an unsigned int to optional field "tag", optimize storage
+/// based on size of x
+void
+bam_aux_append_unsigned(bam1_t& br,
+                        const char* tag,
+                        uint32_t x);
+
+/// change the size of a subsegment of the bam data, 'end' identifies
+/// the byte offset of the end of the segment and 'delta' is the change
+/// to the segment size
+///
+void
+change_bam_data_segment_len(const int end,
+                            const int delta,
+                            bam1_t& br);
+
+
+/// Update bam record bin value -- call after updating pos and/or
+/// cigar fields.
+///
+inline
+void
+bam_update_bin(bam1_t& br)
+{
+    // set bin value:
+    //
+    // Test for position rather than looking at the unmapped flag
+    // because we want to index shadow reads.
+    //
+    bam1_core_t& brc(br.core);
+    if (brc.pos>=0)
+    {
+        if (brc.n_cigar!=0)
+        {
+            // normal case:
+            brc.bin = bam_reg2bin(brc.pos, bam_calend(&brc, bam_get_cigar(&br)));
+        }
+        else
+        {
+            // shadow case: (match logic from samtools)
+            brc.bin = bam_reg2bin(brc.pos, brc.pos+1);
+        }
+    }
+    else
+    {
+        // unmapped, non-shadow reads:
+        brc.bin = 0;
+    }
+}
diff --git a/src/c++/lib/htsapi/bed_record.cpp b/src/c++/lib/htsapi/bed_record.cpp
new file mode 100644
index 0000000..37fccdf
--- /dev/null
+++ b/src/c++/lib/htsapi/bed_record.cpp
@@ -0,0 +1,94 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "bed_record.hh"
+#include "blt_util/parse_util.hh"
+
+#include <cassert>
+
+#include <iostream>
+
+
+
+bool
+bed_record::
+set(const char* s)
+{
+    static const char sep('\t');
+    static const unsigned maxword(3);
+
+    clear();
+
+    line = s;
+
+    // simple tab parse:
+    const char* start(s);
+    const char* p(start);
+
+    unsigned wordindex(0);
+    while (wordindex<maxword)
+    {
+        if ((*p == sep) || (*p == '\n') || (*p == '\0'))
+        {
+            switch (wordindex)
+            {
+            case 0:
+                chrom=std::string(start,p-start);
+                break;
+            case 1:
+                begin=illumina::blt_util::parse_int(start);
+                assert(start==p);
+                break;
+            case 2:
+                end=illumina::blt_util::parse_int(start);
+                assert(start==p);
+                break;
+            default:
+                assert(0);
+                break;
+            }
+            start=p+1;
+            wordindex++;
+        }
+        if ((*p == '\n') || (*p == '\0')) break;
+        ++p;
+    }
+
+    return (wordindex >= maxword);
+}
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const bed_record& bedr)
+{
+    os << bedr.chrom << '\t'
+       << bedr.begin << '\t'
+       << bedr.end << '\n';
+
+    return os;
+}
+
diff --git a/src/c++/lib/htsapi/bed_record.hh b/src/c++/lib/htsapi/bed_record.hh
new file mode 100644
index 0000000..791fd0b
--- /dev/null
+++ b/src/c++/lib/htsapi/bed_record.hh
@@ -0,0 +1,66 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/seq_util.hh"
+
+#include <iosfwd>
+#include <string>
+#include <vector>
+
+
+struct bed_record
+{
+    bed_record()
+    {
+        clear();
+    }
+
+    // set record from record string s, return false on error
+    bool set(const char* s);
+
+    void clear()
+    {
+        chrom.clear();
+        begin=0;
+        end=0;
+        line=nullptr;
+    }
+
+    bool
+    is_valid() const
+    {
+        return (begin <= end);
+    }
+
+    std::string chrom;
+    int begin = 0;
+    int end = 0;
+    const char* line = nullptr;
+};
+
+
+std::ostream& operator<<(std::ostream& os, const bed_record& bedr);
+
diff --git a/src/c++/lib/htsapi/bed_streamer.cpp b/src/c++/lib/htsapi/bed_streamer.cpp
new file mode 100644
index 0000000..8b3a770
--- /dev/null
+++ b/src/c++/lib/htsapi/bed_streamer.cpp
@@ -0,0 +1,87 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "bed_streamer.hh"
+#include "blt_util/blt_exception.hh"
+
+#include <iostream>
+#include <sstream>
+
+
+bool
+bed_streamer::
+next()
+{
+    if (_is_stream_end || (nullptr==_hfp) || (nullptr==_titr)) return false;
+
+    while (true)
+    {
+        if (tbx_itr_next(_hfp, _tidx, _titr, &_kstr) < 0)
+        {
+            _is_stream_end=true;
+        }
+        else
+        {
+            _is_stream_end=(nullptr == _kstr.s);
+        }
+        _is_record_set=(! _is_stream_end);
+        if (! _is_record_set) break;
+
+        // filter out header for whole file access case:
+        if (_kstr.s[0] == '#') continue;
+
+        _record_no++;
+
+        if (! _bedrec.set(_kstr.s))
+        {
+            std::ostringstream oss;
+            oss << "ERROR: Can't parse BED record: '" << _kstr.s << "'\n";
+            throw blt_exception(oss.str().c_str());
+        }
+        if (! _bedrec.is_valid()) continue;
+        break;
+    }
+
+    return _is_record_set;
+}
+
+
+
+void
+bed_streamer::
+report_state(std::ostream& os) const
+{
+    const bed_record* bedp(get_record_ptr());
+
+    os << "\tbed_stream_label: " << name() << "\n";
+    if (nullptr != bedp)
+    {
+        os << "\tbed_stream_record_no: " << record_no() << "\n"
+           << "\tbed_record: " << *(bedp) << "\n";
+    }
+    else
+    {
+        os << "\tno bed record currently set\n";
+    }
+}
diff --git a/src/c++/lib/htsapi/bed_streamer.hh b/src/c++/lib/htsapi/bed_streamer.hh
new file mode 100644
index 0000000..b4eb84e
--- /dev/null
+++ b/src/c++/lib/htsapi/bed_streamer.hh
@@ -0,0 +1,54 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "bed_record.hh"
+#include "hts_streamer.hh"
+
+
+struct bed_streamer : public hts_streamer
+{
+    bed_streamer(
+        const char* filename,
+        const char* region) :
+        hts_streamer(filename,region)
+    {}
+
+    /// advance to next record
+    ///
+    bool next();
+
+    const bed_record*
+    get_record_ptr() const
+    {
+        if (_is_record_set) return &_bedrec;
+        else                return nullptr;
+    }
+
+    void report_state(std::ostream& os) const;
+
+private:
+    bed_record _bedrec;
+};
diff --git a/src/c++/lib/htsapi/hts_streamer.cpp b/src/c++/lib/htsapi/hts_streamer.cpp
new file mode 100644
index 0000000..de1a082
--- /dev/null
+++ b/src/c++/lib/htsapi/hts_streamer.cpp
@@ -0,0 +1,120 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+
+#include "hts_streamer.hh"
+#include "blt_util/blt_exception.hh"
+#include "blt_util/log.hh"
+
+#include <cstdlib>
+
+#include <iostream>
+
+
+static const kstring_t kinit = {0,0,0};
+
+
+hts_streamer::
+hts_streamer(
+    const char* filename,
+    const char* region) :
+    _is_record_set(false),
+    _is_stream_end(false),
+    _record_no(0),
+    _stream_name(filename),
+    _hfp(nullptr),
+    _tidx(nullptr),
+    _titr(nullptr),
+    _kstr(kinit)
+{
+    if (nullptr == filename)
+    {
+        throw blt_exception("hts filename is null ptr");
+    }
+
+    if (nullptr == region)
+    {
+        throw blt_exception("hts region is null ptr");
+    }
+
+    if ('\0' == *filename)
+    {
+        throw blt_exception("hts filename is empty string");
+    }
+
+    _hfp = hts_open(filename, "r");
+    if (nullptr == _hfp)
+    {
+        log_os << "ERROR: Failed to open hts file: '" << filename << "'\n";
+        exit(EXIT_FAILURE);
+    }
+
+    _load_index();
+
+    // read only a region of HTS file:
+    set_region(region);
+}
+
+
+
+hts_streamer::
+~hts_streamer()
+{
+    if (nullptr != _titr) tbx_itr_destroy(_titr);
+    if (nullptr != _tidx) tbx_destroy(_tidx);
+    if (nullptr != _hfp) hts_close(_hfp);
+    if (nullptr != _kstr.s) free(_kstr.s);
+}
+
+
+void
+hts_streamer::
+set_region(
+    const char* region)
+{
+    // free _titr if it's already been set to avoid memory leaks
+    if (nullptr != _titr) tbx_itr_destroy(_titr);
+
+    _titr = tbx_itr_querys(_tidx, region);
+    if (nullptr == _titr)
+    {
+        _is_stream_end=true;
+    }
+}
+
+
+// load index if it hasn't been set already:
+void
+hts_streamer::
+_load_index()
+{
+    if (nullptr != _tidx) return;
+
+    _tidx = tbx_index_load(name());
+    if (nullptr == _tidx)
+    {
+        log_os << "ERROR: Failed to load index for hts file: '" << name() << "'\n";
+        exit(EXIT_FAILURE);
+    }
+}
diff --git a/src/c++/lib/htsapi/hts_streamer.hh b/src/c++/lib/htsapi/hts_streamer.hh
new file mode 100644
index 0000000..d93dab8
--- /dev/null
+++ b/src/c++/lib/htsapi/hts_streamer.hh
@@ -0,0 +1,71 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "bam_util.hh"
+#include "tabix_util.hh"
+
+#include "boost/utility.hpp"
+
+#include <string>
+
+
+struct hts_streamer : private boost::noncopyable
+{
+    hts_streamer(
+        const char* filename,
+        const char* region);
+
+    ~hts_streamer();
+
+    const char*
+    name() const
+    {
+        return _stream_name.c_str();
+    }
+
+    unsigned
+    record_no() const
+    {
+        return _record_no;
+    }
+
+    void set_region(
+        const char* region);
+
+protected:
+    void
+    _load_index();
+
+    bool _is_record_set;
+    bool _is_stream_end;
+    unsigned _record_no;
+    std::string _stream_name;
+
+    htsFile* _hfp;
+    tbx_t* _tidx;
+    hts_itr_t* _titr;
+    kstring_t _kstr;
+};
diff --git a/src/c++/lib/htsapi/sam_util.hh b/src/c++/lib/htsapi/sam_util.hh
new file mode 100644
index 0000000..8c13569
--- /dev/null
+++ b/src/c++/lib/htsapi/sam_util.hh
@@ -0,0 +1,34 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/thirdparty_push.h"
+
+extern "C" {
+#include <unistd.h> // this simplifies zlib on windows
+#include "htslib/sam.h"
+}
+
+#include "blt_util/thirdparty_pop.h"
diff --git a/src/c++/lib/htsapi/samtools_fasta_util.cpp b/src/c++/lib/htsapi/samtools_fasta_util.cpp
new file mode 100644
index 0000000..3897f80
--- /dev/null
+++ b/src/c++/lib/htsapi/samtools_fasta_util.cpp
@@ -0,0 +1,171 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Bret Barnes
+///
+
+#include "samtools_fasta_util.hh"
+
+#include "blt_util/blt_exception.hh"
+#include "blt_util/parse_util.hh"
+#include "blt_util/seq_util.hh"
+#include "blt_util/string_util.hh"
+
+extern "C"
+{
+#include "htslib/faidx.h"
+}
+
+#include <cassert>
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <vector>
+
+
+
+void
+get_chrom_sizes(const std::string& fai_file,
+                std::map<std::string,unsigned>& chrom_sizes)
+{
+    static const char delim('\t');
+
+    chrom_sizes.clear();
+    std::ifstream fis(fai_file.c_str());
+
+    std::string line;
+    std::vector<std::string> word;
+
+    while (! fis.eof())
+    {
+        getline(fis, line);
+
+        split_string(line, delim, word);
+
+        assert(2 <= word.size());
+
+        assert(0 == chrom_sizes.count(word[0]));
+
+        const unsigned length(illumina::blt_util::parse_unsigned_str(word[1]));
+        chrom_sizes.insert(std::make_pair(word[0],length));
+    }
+}
+
+
+
+unsigned
+get_chrom_length(const std::string& fai_file,
+                 const std::string& chrom_name)
+{
+    static const char delim('\t');
+
+    bool isFound(false);
+    unsigned retval(0);
+    {
+        std::ifstream fis(fai_file.c_str());
+
+        std::string line;
+        std::vector<std::string> word;
+
+        while (! fis.eof())
+        {
+            getline(fis, line);
+
+            split_string(line, delim, word);
+
+            assert(2 <= word.size());
+            if (word[0] != chrom_name) continue;
+            retval=illumina::blt_util::parse_unsigned_str(word[1]);
+            isFound=true;
+            break;
+        }
+    }
+
+    if (! isFound)
+    {
+        std::ostringstream oss;
+        oss << "ERROR: Unable to find chromosome '" << chrom_name << "' in fai file '" << fai_file << "'\n";
+        throw blt_exception(oss.str().c_str());
+    }
+    return retval;
+}
+
+
+
+void
+get_region_seq(const std::string& ref_file,
+               const std::string& fa_region,
+               std::string& ref_seq)
+{
+
+    faidx_t* fai(fai_load(ref_file.c_str()));
+    int len; // throwaway...
+    char* ref_tmp(fai_fetch(fai,fa_region.c_str(), &len));
+    if (NULL == ref_tmp)
+    {
+        std::ostringstream oss;
+        oss << "ERROR: Can't find sequence region '" << fa_region << "' in reference file: '" << ref_file << "'\n";
+        throw blt_exception(oss.str().c_str());
+    }
+    ref_seq.assign(ref_tmp);
+    free(ref_tmp);
+    fai_destroy(fai);
+}
+
+
+
+void
+get_region_seq(const std::string& ref_file,
+               const std::string& chrom,
+               const int begin_pos,
+               const int end_pos,
+               std::string& ref_seq)
+{
+    faidx_t* fai(fai_load(ref_file.c_str()));
+    int len; // throwaway...
+    char* ref_tmp(faidx_fetch_seq(fai,const_cast<char*>(chrom.c_str()), begin_pos, end_pos, &len));
+    if (NULL == ref_tmp)
+    {
+        std::ostringstream oss;
+        oss << "ERROR: Can't find sequence region '" << chrom << ":" << (begin_pos+1) << "-" << (end_pos+1) << "' in reference file: '" << ref_file << "'\n";
+        throw blt_exception(oss.str().c_str());
+    }
+    ref_seq.assign(ref_tmp);
+    free(ref_tmp);
+    fai_destroy(fai);
+}
+
+
+
+
+
+void
+get_standardized_region_seq(
+    const std::string& ref_file,
+    const std::string& chrom,
+    const int begin_pos,
+    const int end_pos,
+    std::string& ref_seq)
+{
+    get_region_seq(ref_file,chrom,begin_pos,end_pos,ref_seq);
+    standardize_ref_seq(ref_file.c_str(), chrom.c_str(), ref_seq, begin_pos);
+}
diff --git a/src/c++/lib/htsapi/samtools_fasta_util.hh b/src/c++/lib/htsapi/samtools_fasta_util.hh
new file mode 100644
index 0000000..8d89a03
--- /dev/null
+++ b/src/c++/lib/htsapi/samtools_fasta_util.hh
@@ -0,0 +1,74 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Bret Barnes
+///
+
+#pragma once
+
+#include <map>
+#include <string>
+
+
+/// retrieve a map of chromosome sizes from the fasta index
+///
+void
+get_chrom_sizes(
+    const std::string& fai_file,
+    std::map<std::string,unsigned>& chrom_sizes);
+
+
+/// retrieve size of specific chromosome from the fasta index
+///
+unsigned
+get_chrom_length(
+    const std::string& fai_file,
+    const std::string& chrom_name);
+
+
+/// get reference sequence from region
+void
+get_region_seq(
+    const std::string& ref_file,
+    const std::string& fa_region,
+    std::string& ref_seq);
+
+/// get reference sequence from decomposed region
+void
+get_region_seq(
+    const std::string& ref_file,
+    const std::string& chrom,
+    const int begin_pos,
+    const int end_pos,
+    std::string& ref_seq);
+
+
+/// get reference sequence from decomposed region and run standardization result
+///
+/// \param begin_pos begin position (zero-indexed, closed)
+/// \param end_pos end position (zero-indexed, closed)
+void
+get_standardized_region_seq(
+    const std::string& ref_file,
+    const std::string& chrom,
+    const int begin_pos,
+    const int end_pos,
+    std::string& ref_seq);
diff --git a/src/c++/lib/htsapi/tabix_util.hh b/src/c++/lib/htsapi/tabix_util.hh
new file mode 100644
index 0000000..36cd4f4
--- /dev/null
+++ b/src/c++/lib/htsapi/tabix_util.hh
@@ -0,0 +1,36 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/thirdparty_push.h"
+
+extern "C" {
+#include <unistd.h> // this simplifies zlib on windows
+#define __STDC_LIMIT_MACROS
+#include "htslib/vcf.h"
+#include "htslib/tbx.h"
+}
+
+#include "blt_util/thirdparty_pop.h"
diff --git a/src/c++/lib/htsapi/test/CMakeLists.txt b/src/c++/lib/htsapi/test/CMakeLists.txt
new file mode 100644
index 0000000..83c64d7
--- /dev/null
+++ b/src/c++/lib/htsapi/test/CMakeLists.txt
@@ -0,0 +1,28 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## Configuration file for the unit tests subdirectory
+##
+## author Ole Schulz-Trieglaff
+##
+################################################################################
+
+include(${THIS_CXX_TEST_LIBRARY_CMAKE})
diff --git a/src/c++/lib/htsapi/test/align_path_bam_util_test.cpp b/src/c++/lib/htsapi/test/align_path_bam_util_test.cpp
new file mode 100644
index 0000000..d1d8d3b
--- /dev/null
+++ b/src/c++/lib/htsapi/test/align_path_bam_util_test.cpp
@@ -0,0 +1,49 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "htsapi/align_path_bam_util.hh"
+#include "htsapi/bam_record.hh"
+
+#include "boost/test/unit_test.hpp"
+
+
+
+BOOST_AUTO_TEST_SUITE( test_align_path_bam_util )
+
+
+BOOST_AUTO_TEST_CASE( test_edit_bam_cigar )
+{
+    const std::string testCigar("10M10D1I10M");
+    ALIGNPATH::path_t inputPath;
+    cigar_to_apath(testCigar.c_str(),inputPath);
+
+    bam_record bamRead;
+    bam1_t* bamDataPtr(bamRead.get_data());
+    edit_bam_cigar(inputPath,*bamDataPtr);
+
+    ALIGNPATH::path_t outputPath;
+    bam_cigar_to_apath(bamRead.raw_cigar(),bamRead.n_cigar(),outputPath);
+
+    BOOST_REQUIRE_EQUAL(apath_to_cigar(outputPath),testCigar);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/htsapi/test/bam_header_util_test.cpp b/src/c++/lib/htsapi/test/bam_header_util_test.cpp
new file mode 100644
index 0000000..d66be4a
--- /dev/null
+++ b/src/c++/lib/htsapi/test/bam_header_util_test.cpp
@@ -0,0 +1,48 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "htsapi/bam_header_util.hh"
+
+#include "boost/test/unit_test.hpp"
+
+
+
+BOOST_AUTO_TEST_SUITE( test_bam_header_util )
+
+BOOST_AUTO_TEST_CASE( test_parse_bam_region )
+{
+    std::string chrom;
+    int32_t begin,end;
+    parse_bam_region("HLA-A*01:01:01:02N:1-3291",chrom,begin,end);
+
+    BOOST_REQUIRE_EQUAL(chrom, "HLA-A*01:01:01:02N");
+    BOOST_REQUIRE_EQUAL(begin, 0);
+    BOOST_REQUIRE_EQUAL(end, 3291);
+
+    parse_bam_region("HLA-A*01:01:01:02N",chrom,begin,end);
+
+    BOOST_REQUIRE_EQUAL(chrom, "HLA-A*01:01:01:02N");
+    BOOST_REQUIRE_EQUAL(begin, 0);
+    BOOST_REQUIRE(end > 1000000000);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/htsapi/test/bed_streamer_test.bed.gz b/src/c++/lib/htsapi/test/bed_streamer_test.bed.gz
new file mode 100644
index 0000000..60b82b2
Binary files /dev/null and b/src/c++/lib/htsapi/test/bed_streamer_test.bed.gz differ
diff --git a/src/c++/lib/htsapi/test/bed_streamer_test.bed.gz.tbi b/src/c++/lib/htsapi/test/bed_streamer_test.bed.gz.tbi
new file mode 100644
index 0000000..b538ef3
Binary files /dev/null and b/src/c++/lib/htsapi/test/bed_streamer_test.bed.gz.tbi differ
diff --git a/src/c++/lib/htsapi/test/bed_streamer_test.cpp b/src/c++/lib/htsapi/test/bed_streamer_test.cpp
new file mode 100644
index 0000000..ee00a0d
--- /dev/null
+++ b/src/c++/lib/htsapi/test/bed_streamer_test.cpp
@@ -0,0 +1,70 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "test_config.h"
+
+#include "htsapi/bed_streamer.hh"
+
+#include "boost/test/unit_test.hpp"
+
+
+
+BOOST_AUTO_TEST_SUITE( test_bed_streamer )
+
+static
+const char*
+getTestpath()
+{
+    static const std::string testPath(std::string(TEST_DATA_PATH) + "/bed_streamer_test.bed.gz");
+    return testPath.c_str();
+}
+
+
+BOOST_AUTO_TEST_CASE( test_bed_streamer_region )
+{
+    bed_streamer beds(getTestpath(),"chr1:750000-822000");
+
+    const bed_record* bedr(nullptr);
+
+    BOOST_REQUIRE( beds.next() );
+    bedr = beds.get_record_ptr();
+    assert(bedr != nullptr);
+
+    BOOST_REQUIRE( bedr->is_valid() );
+
+    BOOST_REQUIRE_EQUAL(bedr->chrom, "chr1");
+    BOOST_REQUIRE_EQUAL(bedr->begin, 750000);
+    BOOST_REQUIRE_EQUAL(bedr->end, 750001);
+
+    BOOST_REQUIRE( beds.next() );
+    bedr = beds.get_record_ptr();
+    assert(bedr != nullptr);
+
+    BOOST_REQUIRE( bedr->is_valid() );
+    BOOST_REQUIRE_EQUAL(bedr->chrom, "chr1");
+    BOOST_REQUIRE_EQUAL(bedr->begin, 800000);
+    BOOST_REQUIRE_EQUAL(bedr->end, 800001);
+
+    BOOST_REQUIRE(! beds.next() );
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/htsapi/test/test_config.h.in b/src/c++/lib/htsapi/test/test_config.h.in
new file mode 100644
index 0000000..c686c6c
--- /dev/null
+++ b/src/c++/lib/htsapi/test/test_config.h.in
@@ -0,0 +1,23 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#pragma once
+
+#define TEST_DATA_PATH "@CMAKE_CURRENT_SOURCE_DIR@"
diff --git a/src/c++/lib/htsapi/test/test_main.cpp b/src/c++/lib/htsapi/test/test_main.cpp
new file mode 100644
index 0000000..6cf9c45
--- /dev/null
+++ b/src/c++/lib/htsapi/test/test_main.cpp
@@ -0,0 +1,23 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#define BOOST_TEST_MODULE libhtsapi
+#include "boost/test/unit_test.hpp"
+
diff --git a/src/c++/lib/htsapi/test/vcf_streamer_test.cpp b/src/c++/lib/htsapi/test/vcf_streamer_test.cpp
new file mode 100644
index 0000000..15c1f79
--- /dev/null
+++ b/src/c++/lib/htsapi/test/vcf_streamer_test.cpp
@@ -0,0 +1,106 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "test_config.h"
+
+#include "htsapi/vcf_streamer.hh"
+
+#include "boost/test/unit_test.hpp"
+
+
+
+BOOST_AUTO_TEST_SUITE( test_vcf_streamer )
+
+static
+const char*
+getTestpath()
+{
+    static const std::string testPath(std::string(TEST_DATA_PATH) + "/vcf_streamer_test.vcf.gz");
+    return testPath.c_str();
+}
+
+
+BOOST_AUTO_TEST_CASE( test_vcf_streamer_region )
+{
+    vcf_streamer vcfs(getTestpath(),"chr1:750000-822000");
+
+    const vcf_record* vptr(nullptr);
+
+    BOOST_REQUIRE( vcfs.next() );
+    vptr = vcfs.get_record_ptr();
+    assert(vptr != nullptr);
+
+    BOOST_REQUIRE( vptr->is_valid() );
+    BOOST_REQUIRE( vptr->is_indel() );
+    BOOST_REQUIRE( ! vptr->is_snv() );
+
+    BOOST_REQUIRE_EQUAL(vptr->pos, 757807);
+    BOOST_REQUIRE_EQUAL(vptr->ref,"CCCTGGCCAGCAGATCCACCCTGTCTATACTACCTG");
+
+    BOOST_REQUIRE( vcfs.next() );
+    vptr = vcfs.get_record_ptr();
+    assert(vptr != nullptr);
+
+    BOOST_REQUIRE( vptr->is_valid() );
+    BOOST_REQUIRE( ! vptr->is_indel() );
+    BOOST_REQUIRE( vptr->is_snv() );
+    BOOST_REQUIRE_EQUAL(vptr->pos, 758807);
+    BOOST_REQUIRE_EQUAL(vptr->alt.size(),2u);
+    BOOST_REQUIRE_EQUAL(vptr->alt[0],"T");
+
+    BOOST_REQUIRE( vcfs.next() );
+    vptr = vcfs.get_record_ptr();
+    assert(vptr != nullptr);
+
+    BOOST_REQUIRE( vptr->is_valid() );
+    BOOST_REQUIRE( vptr->is_indel() );
+    BOOST_REQUIRE( ! vptr->is_snv() );
+    BOOST_REQUIRE_EQUAL(vptr->pos, 821604);
+    BOOST_REQUIRE_EQUAL(vptr->alt.size(),1u);
+    BOOST_REQUIRE_EQUAL(vptr->alt[0],"TGCCCTTTGGCAGAGCAGGTGTGCTGTGCTG");
+
+    BOOST_REQUIRE( ! vcfs.next() );
+    vptr = vcfs.get_record_ptr();
+    assert(vptr == nullptr);
+}
+
+#if 0
+BOOST_AUTO_TEST_CASE( test_vcf_streamer_noregion )
+{
+    vcf_streamer vcfs(getTestpath());
+
+    const vcf_record* vptr(nullptr);
+
+    BOOST_REQUIRE( vcfs.next() );
+    vptr = vcfs.get_record_ptr();
+    assert(vptr != nullptr);
+
+    BOOST_REQUIRE( vptr->is_valid() );
+    BOOST_REQUIRE( vptr->is_indel() );
+    BOOST_REQUIRE( ! vptr->is_snv() );
+
+    BOOST_REQUIRE_EQUAL(vptr->pos, 54712);
+
+    BOOST_REQUIRE( vcfs.next() );
+}
+#endif
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/htsapi/test/vcf_streamer_test.vcf.gz b/src/c++/lib/htsapi/test/vcf_streamer_test.vcf.gz
new file mode 100644
index 0000000..21770f9
Binary files /dev/null and b/src/c++/lib/htsapi/test/vcf_streamer_test.vcf.gz differ
diff --git a/src/c++/lib/htsapi/test/vcf_streamer_test.vcf.gz.tbi b/src/c++/lib/htsapi/test/vcf_streamer_test.vcf.gz.tbi
new file mode 100644
index 0000000..993ab64
Binary files /dev/null and b/src/c++/lib/htsapi/test/vcf_streamer_test.vcf.gz.tbi differ
diff --git a/src/c++/lib/htsapi/vcf_record.cpp b/src/c++/lib/htsapi/vcf_record.cpp
new file mode 100644
index 0000000..8701576
--- /dev/null
+++ b/src/c++/lib/htsapi/vcf_record.cpp
@@ -0,0 +1,201 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "vcf_record.hh"
+#include "blt_util/parse_util.hh"
+
+#include <cassert>
+#include <cctype>
+
+#include <algorithm>
+#include <iostream>
+
+
+
+struct convert
+{
+    void operator()(char& c) const
+    {
+        c = toupper((unsigned char)c);
+    }
+};
+
+
+static
+void
+stoupper(std::string& s)
+{
+    std::for_each(s.begin(), s.end(), convert());
+}
+
+
+
+bool
+vcf_record::
+set(const char* s)
+{
+    static const char sep('\t');
+    static const unsigned maxword(5);
+
+    clear();
+
+    line = s;
+
+    // simple tab parse:
+    const char* start(s);
+    const char* p(start);
+
+    unsigned wordindex(0);
+    while (wordindex<maxword)
+    {
+        if ((*p == sep) || (*p == '\n') || (*p == '\0'))
+        {
+            switch (wordindex)
+            {
+            case 0:
+                chrom=std::string(start,p-start);
+                break;
+            case 1:
+                pos=illumina::blt_util::parse_int(start);
+                assert(start==p);
+                break;
+            case 2:
+                // skip this field...
+                break;
+            case 3:
+                ref=std::string(start,p-start);
+                stoupper(ref);
+                break;
+            case 4:
+                // additional parse loop for ',' character:
+            {
+                const char* p2(start);
+                while (p2<=p)
+                {
+                    if ((*p2==',') || (p2==p))
+                    {
+                        alt.emplace_back(start,p2-start);
+                        stoupper(alt.back());
+                        start=p2+1;
+                    }
+                    p2++;
+                }
+            }
+            break;
+            default:
+                assert(0);
+                break;
+            }
+            start=p+1;
+            wordindex++;
+        }
+        if ((*p == '\n') || (*p == '\0')) break;
+        ++p;
+    }
+
+    return (wordindex >= maxword);
+}
+
+bool
+vcf_record::
+is_normalized() const
+{
+    // normalized indels are left-aligned, reference-padded, and parsimonious
+    // normalized SNVs are a single differing base
+    // normalized MNVs (and complex alleles) have differing bases at the beginning
+    // and end of the ref and alt alleles.  However, many input VCFs have
+    // reference-padded MNVs, which should not affect Strelka's calling, so for
+    // now, we're allowing variants to violate left-parsimony in MNVs and complex
+    // alleles
+    // see http://genome.sph.umich.edu/wiki/Variant_Normalization
+    unsigned ref_length = ref.size();
+    assert (ref_length != 0);
+
+    for (const auto& alt_allele : alt)
+    {
+        unsigned alt_length = alt_allele.size();
+        assert (alt_length != 0);
+
+        // all normalized variants with the same length ref and alt
+        // must differ at the last ref and alt base.  Any indel that
+        // has more than one base in both the ref and the alt must
+        // also differ at the last base (this should only happen at
+        // complex indels).  This checks for right-padding, i.e. parsimony
+        if ((alt_length > 1 && ref_length > 1) ||
+            alt_length == ref_length)
+        {
+            if ((*alt_allele.rbegin()) == (*ref.rbegin()))
+            {
+                return false;
+            }
+        }
+
+        if (alt_length != ref_length)
+        {
+            // this checks that indels are reference-padded
+            if ( (*alt_allele.begin()) == (*ref.begin()))
+            {
+                // this checks that they're left-shifted
+                for (unsigned i = ref_length - 1, j = alt_length - 1; ; ++i, ++j)
+                {
+                    if (ref[i] != alt_allele[j])
+                    {
+                        break;
+                    }
+                    else if (i == 0 || j == 0)
+                    {
+                        return false;
+                    }
+                }
+            }
+            // if the first and last bases of alleles with two differing lengths
+            // do not match, the record represents a complex allele, and fulfills
+            // normalization requirements
+        }
+    }
+    return true;
+}
+
+std::ostream& operator<<(std::ostream& os, const vcf_record& vcfr)
+{
+    os << vcfr.chrom << '\t'
+       << vcfr.pos << '\t'
+       << '.' << '\t'
+       << vcfr.ref << '\t';
+
+    const unsigned nalt(vcfr.alt.size());
+    for (unsigned a(0); a<nalt; ++a)
+    {
+        if (a) os << ',';
+        os << vcfr.alt[a];
+    }
+    os << '\t'
+       << '.' << '\t'
+       << '.' << '\t'
+       << '.' << '\t'
+       << '.' << '\n';
+
+    return os;
+}
+
diff --git a/src/c++/lib/htsapi/vcf_record.hh b/src/c++/lib/htsapi/vcf_record.hh
new file mode 100644
index 0000000..82e4c06
--- /dev/null
+++ b/src/c++/lib/htsapi/vcf_record.hh
@@ -0,0 +1,104 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/seq_util.hh"
+
+#include <iosfwd>
+#include <string>
+#include <vector>
+
+
+struct vcf_record
+{
+    vcf_record()
+    {
+        clear();
+    }
+
+    // set record from record string s, return false on error
+    bool set(const char* s);
+
+    void clear()
+    {
+        chrom.clear();
+        pos=0;
+        ref.clear();
+        alt.clear();
+        line=nullptr;
+    }
+
+
+    // N.B. the current implementation of is_valid does not
+    // allow symbolic ALTs, e.g. representing a deletion using
+    // the <DEL> symbolic allele and the END INFO field.  This
+    // is probably fine, but worth noting
+    bool
+    is_valid() const
+    {
+        if (! is_valid_seq(ref.c_str())) return false;
+        for (const auto& alt_allele : alt)
+        {
+            if (! is_valid_seq(alt_allele.c_str())) return false;
+        }
+        return true;
+    }
+
+    bool
+    is_indel() const
+    {
+        if (! is_valid()) return false;
+        if ((ref.size()>1) && (alt.size()>0)) return true;
+        for (const auto& alt_allele : alt)
+        {
+            if (alt_allele.size()>1) return true;
+        }
+        return false;
+    }
+
+    bool
+    is_snv() const
+    {
+        if (! is_valid()) return false;
+        if (1 != ref.size()) return false;
+        for (const auto& alt_allele : alt)
+        {
+            if (1 != alt_allele.size()) return false;
+        }
+        return true;
+    }
+
+    bool is_normalized() const;
+
+    std::string chrom;
+    int pos = 0;
+    std::string ref;
+    std::vector<std::string> alt;
+    const char* line = nullptr;
+};
+
+
+std::ostream& operator<<(std::ostream& os, const vcf_record& vcfr);
+
diff --git a/src/c++/lib/htsapi/vcf_streamer.cpp b/src/c++/lib/htsapi/vcf_streamer.cpp
new file mode 100644
index 0000000..662d469
--- /dev/null
+++ b/src/c++/lib/htsapi/vcf_streamer.cpp
@@ -0,0 +1,172 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "vcf_streamer.hh"
+
+#include "blt_util/blt_exception.hh"
+#include "blt_util/log.hh"
+#include "blt_util/seq_util.hh"
+
+#include <cassert>
+#include <cstdlib>
+#include <sys/stat.h>
+
+#include <iostream>
+#include <set>
+#include <string>
+
+
+
+// return true only if all chromosomes in the bcf/vcf exist in the
+// bam header
+static
+void
+check_bam_bcf_header_compatability(
+    const char* bcf_filename,
+    const bcf_hdr_t* bcfh,
+    const bam_hdr_t& bamh)
+{
+    assert(nullptr != bcfh);
+
+    // build set of chrom labels from BAM:
+    std::set<std::string> bamlabels;
+    for (int32_t i(0); i<bamh.n_targets; ++i)
+    {
+        bamlabels.insert(std::string(bamh.target_name[i]));
+    }
+    int n_labels(0);
+
+    const char** bcf_labels = bcf_hdr_seqnames(bcfh, &n_labels);
+
+    for (int i(0); i<n_labels; ++i)
+    {
+        if (bamlabels.find(std::string(bcf_labels[i])) != bamlabels.end()) continue;
+        log_os << "ERROR: Chromosome label '" << bcf_labels[i] << "' in BCF/VCF file '" << bcf_filename << "' does not exist in the BAM header\n";
+        exit(EXIT_FAILURE);
+    }
+
+    free(bcf_labels);
+}
+
+
+
+vcf_streamer::
+vcf_streamer(
+    const char* filename,
+    const char* region) :
+    hts_streamer(filename,region),
+    _hdr(nullptr)
+{
+    //
+    // note with the switch to samtools 1.X vcf/bcf still involve predominantly separate
+    // apis -- no bcf support added here, but a shared function has been chosen where possible
+    // ... for_instance hts_open/bcf_hdr_read should work with both vcf and bcf
+    //
+
+    _hdr = bcf_hdr_read(_hfp);
+    if (nullptr == _hdr)
+    {
+        log_os << "ERROR: Failed to load header for VCF file: '" << filename << "'\n";
+        exit(EXIT_FAILURE);
+    }
+}
+
+
+
+vcf_streamer::
+~vcf_streamer()
+{
+    if (nullptr != _hdr) bcf_hdr_destroy(_hdr);
+}
+
+
+
+bool
+vcf_streamer::
+next(
+    const bool is_indel_only)
+{
+    if (_is_stream_end || (nullptr==_hfp) || (nullptr==_titr)) return false;
+
+    while (true)
+    {
+        if (tbx_itr_next(_hfp, _tidx, _titr, &_kstr) < 0)
+        {
+            _is_stream_end=true;
+        }
+        else
+        {
+            _is_stream_end=(nullptr == _kstr.s);
+        }
+        _is_record_set=(! _is_stream_end);
+        if (! _is_record_set) break;
+
+        // filter out header for whole file access case:
+        if (_kstr.s[0] == '#') continue;
+
+        _record_no++;
+
+        if (! _vcfrec.set(_kstr.s))
+        {
+            log_os << "ERROR: Can't parse vcf record: '" << _kstr.s << "'\n";
+            exit(EXIT_FAILURE);
+        }
+        if (! _vcfrec.is_valid()) continue;
+        if (is_indel_only && (! _vcfrec.is_indel())) continue;
+
+        break; // found expected vcf record type
+    }
+
+    return _is_record_set;
+}
+
+
+
+void
+vcf_streamer::
+report_state(std::ostream& os) const
+{
+    const vcf_record* vcfp(get_record_ptr());
+
+    os << "\tvcf_stream_label: " << name() << "\n";
+    if (nullptr != vcfp)
+    {
+        os << "\tvcf_stream_record_no: " << record_no() << "\n"
+           << "\tvcf_record: " << *(vcfp) << "\n";
+    }
+    else
+    {
+        os << "\tno vcf record currently set\n";
+    }
+}
+
+
+
+void
+vcf_streamer::
+validateBamHeaderChromSync(
+    const bam_hdr_t& header) const
+{
+    check_bam_bcf_header_compatability(name(), _hdr, header);
+}
diff --git a/src/c++/lib/htsapi/vcf_streamer.hh b/src/c++/lib/htsapi/vcf_streamer.hh
new file mode 100644
index 0000000..fc12444
--- /dev/null
+++ b/src/c++/lib/htsapi/vcf_streamer.hh
@@ -0,0 +1,63 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "hts_streamer.hh"
+#include "vcf_record.hh"
+
+
+struct vcf_streamer : public hts_streamer
+{
+    vcf_streamer(
+        const char* filename,
+        const char* region);
+
+    ~vcf_streamer();
+
+    // advance to next vcf record
+    //
+    // is_indel_only - if set, skip all records except indels
+    //
+    bool next(const bool is_indel_only=false);
+
+    const vcf_record*
+    get_record_ptr() const
+    {
+        if (_is_record_set) return &_vcfrec;
+        else                return nullptr;
+    }
+
+    void report_state(std::ostream& os) const;
+
+    /// provide a BAM header to validate vcf chromosome names against
+    ///
+    void
+    validateBamHeaderChromSync(
+        const bam_hdr_t& header) const;
+
+private:
+    bcf_hdr_t* _hdr;
+    vcf_record _vcfrec;
+};
diff --git a/src/c++/lib/htsapi/vcf_util.cpp b/src/c++/lib/htsapi/vcf_util.cpp
new file mode 100644
index 0000000..adadb08
--- /dev/null
+++ b/src/c++/lib/htsapi/vcf_util.cpp
@@ -0,0 +1,165 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "htsapi/vcf_util.hh"
+
+#include "blt_util/blt_exception.hh"
+#include <cassert>
+#include <cctype>
+#include <ctime>
+
+#include <iostream>
+#include <sstream>
+
+
+
+std::ostream&
+vcf_fileDate(std::ostream& os)
+{
+    const time_t t(time(NULL));
+    struct tm* ct(localtime(&t));
+    assert(NULL != ct);
+
+    static const unsigned dsize(64);
+    char datebuf[dsize];
+    const size_t ret(strftime(datebuf,dsize,"%Y%m%d",ct));
+    assert(ret!=0);
+    return os << datebuf;
+}
+
+
+
+void
+write_vcf_filter(
+    std::ostream& os,
+    const char* id,
+    const char* desc)
+{
+    os << "##FILTER=<ID=" << id << ",Description=\"" << desc << "\">\n";
+}
+
+
+
+struct gt_parse_helper
+{
+    // return is_valid_genotype
+    static
+    bool
+    start(const char* gt,
+          std::vector<int>& gti,
+          const bool is_badend)
+    {
+        gti.clear();
+        if (isdigit(*gt)) return digit(gt,gti,is_badend);
+
+        switch (*gt)
+        {
+        case '.' :
+            return unknown(gt,gti,is_badend);
+        default:
+            return false;
+        }
+    }
+
+private:
+
+    static
+    bool
+    unknown(const char* gt,
+            std::vector<int>& gti,
+            const bool is_badend)
+    {
+        gt++;
+        gti.push_back(-1);
+        switch (*gt)
+        {
+        case '\0' :
+            return true;
+        case '|' :
+        case '/' :
+            return sep(gt,gti,is_badend);
+        default :
+            return is_badend;
+        }
+    }
+
+    static
+    bool
+    sep(const char* gt,
+        std::vector<int>& gti,
+        const bool is_badend)
+    {
+        gt++;
+        if (isdigit(*gt)) return digit(gt,gti,is_badend);
+        switch (*gt)
+        {
+        case '.' :
+            return unknown(gt,gti,is_badend);
+        default :
+            return false;
+        }
+    }
+
+    static
+    bool
+    digit(const char* gt,
+          std::vector<int>& gti,
+          const bool is_badend)
+    {
+        int val(0);
+        while (isdigit(*gt))
+        {
+            val = val*10 + static_cast<int>(*gt-'0');
+            gt++;
+        }
+        gti.push_back(val);
+
+        switch (*gt)
+        {
+        case '\0' :
+            return true;
+        case '|' :
+        case '/' :
+            return sep(gt,gti,is_badend);
+        default :
+            return is_badend;
+        }
+    }
+};
+
+
+
+void
+parse_gt(const char* gt,
+         std::vector<int>& gti,
+         const bool is_allow_bad_end_char)
+{
+    if (! gt_parse_helper::start(gt,gti,is_allow_bad_end_char))
+    {
+        std::ostringstream oss;
+        oss << "ERROR: can't parse genotype string: '" << gt << "'\n";
+        throw blt_exception(oss.str().c_str());
+    }
+}
+
diff --git a/src/c++/lib/htsapi/vcf_util.hh b/src/c++/lib/htsapi/vcf_util.hh
new file mode 100644
index 0000000..d4aa966
--- /dev/null
+++ b/src/c++/lib/htsapi/vcf_util.hh
@@ -0,0 +1,120 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+/// random vcf utilities
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <cstring>
+#include <iosfwd>
+#include <vector>
+
+
+namespace VCFID
+{
+enum index_t
+{
+    CHROM,
+    POS,
+    ID,
+    REF,
+    ALT,
+    QUAL,
+    FILT,
+    INFO,
+    FORMAT,
+    SAMPLE,
+    SIZE
+};
+}
+
+
+
+inline
+const char*
+vcf_col_label()
+{
+    static const char h[] = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO";
+    return h;
+}
+
+
+std::ostream&
+vcf_fileDate(std::ostream& os);
+
+
+void
+write_vcf_filter(
+    std::ostream& os,
+    const char* id,
+    const char* desc);
+
+
+// look for 'key' in vcf FORMAT field, provide index of key or return
+// false
+//
+inline
+bool
+get_format_key_index(const char* format,
+                     const char* key,
+                     unsigned& index)
+{
+    index=0;
+    do
+    {
+        if (index) format++;
+        if (0==strncmp(format,key,strlen(key))) return true;
+        index++;
+    }
+    while (NULL != (format=strchr(format,':')));
+    return false;
+}
+
+
+
+// return pointer to
+//
+inline
+const char*
+get_format_string_nocopy(const char* const* word,
+                         const char* key)
+{
+    unsigned keynum(0);
+    if (! get_format_key_index(word[VCFID::FORMAT],key,keynum)) return NULL;
+
+    const char* sample(word[VCFID::SAMPLE]);
+    for (; keynum; sample++)
+    {
+        if (! *sample) return NULL;
+        if ((*sample)==':') keynum--;
+    }
+    return sample;
+}
+
+
+
+// returns -1 for '.' alleles
+void
+parse_gt(const char* gt,
+         std::vector<int>& gti,
+         const bool is_allow_bad_end_char=false);
diff --git a/src/c++/lib/manta/BamRegionProcessor.hh b/src/c++/lib/manta/BamRegionProcessor.hh
new file mode 100644
index 0000000..71e77dc
--- /dev/null
+++ b/src/c++/lib/manta/BamRegionProcessor.hh
@@ -0,0 +1,58 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "htsapi/bam_record.hh"
+#include "svgraph/GenomeInterval.hh"
+
+
+/// This enables specification of different methods which
+/// must traverse a range of reads in a bam file. By abstracting
+/// multiple methods to this interface, we can accomplish multiple
+/// tasks over a single pass of the BAM records while maintaining
+/// isolation of methods
+///
+struct BamRegionProcessor
+{
+    virtual
+    ~BamRegionProcessor() {}
+
+    /// provide the index of the next bam file, must be called before switching files/samples
+    ///
+    /// for each bam index, return the requested interval for this operation,
+    /// operations with closely related intervals will be compbined
+    /// and the union of intervals will be processed
+    virtual
+    const GenomeInterval&
+    nextBamIndex(
+        const unsigned bamIndex) = 0;
+
+    /// provide the next bam record
+    /*virtual
+    void
+    processRecord(
+        const bam_record& bamRead) = 0;
+    */
+};
diff --git a/src/c++/lib/manta/CMakeLists.txt b/src/c++/lib/manta/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/manta/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/manta/ChromDepthFilterUtil.cpp b/src/c++/lib/manta/ChromDepthFilterUtil.cpp
new file mode 100644
index 0000000..ec40502
--- /dev/null
+++ b/src/c++/lib/manta/ChromDepthFilterUtil.cpp
@@ -0,0 +1,64 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "common/Exceptions.hh"
+#include "manta/ChromDepthFilterUtil.hh"
+
+#include <sstream>
+
+
+
+ChromDepthFilterUtil::
+ChromDepthFilterUtil(
+    const std::string& chromDepthFile,
+    const double maxDepthFactor,
+    const bam_header_info& header) :
+    _isMaxDepthFilter(! chromDepthFile.empty())
+{
+    using namespace illumina::common;
+
+    // read in chrom depth file if one is specified:
+    if (! _isMaxDepthFilter) return;
+
+    cdmap_t chromDepth;
+    parse_chrom_depth(chromDepthFile,chromDepth);
+
+    // translate string chrom labels into tid values in lookup vector:
+    //
+    for (const bam_header_info::chrom_info& cdata : header.chrom_data)
+    {
+        cdmap_t::const_iterator cdi(chromDepth.find(cdata.label));
+        if (cdi == chromDepth.end())
+        {
+            std::ostringstream oss;
+            oss << "ERROR: Can't find chromosome: '" << cdata.label
+                << "' in chrom depth file: " << chromDepthFile << "\n";
+            BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+        }
+
+        _maxDepthFilter.push_back(cdi->second*maxDepthFactor);
+        assert(_maxDepthFilter.back()>=0.);
+    }
+}
+
diff --git a/src/c++/lib/manta/ChromDepthFilterUtil.hh b/src/c++/lib/manta/ChromDepthFilterUtil.hh
new file mode 100644
index 0000000..27bfdf4
--- /dev/null
+++ b/src/c++/lib/manta/ChromDepthFilterUtil.hh
@@ -0,0 +1,63 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/chrom_depth_map.hh"
+#include "htsapi/bam_header_info.hh"
+
+#include <cassert>
+
+#include <vector>
+
+
+/// hold information about chrom depth cutoffs
+///
+/// preprocess the chrom depth file so that the filter value can be
+/// efficiently looked up by bam tid
+///
+struct ChromDepthFilterUtil
+{
+    ChromDepthFilterUtil(
+        const std::string& chromDepthFile,
+        const double maxDepthFactor,
+        const bam_header_info& header);
+
+    bool
+    isMaxDepthFilter() const
+    {
+        return _isMaxDepthFilter;
+    }
+
+    double
+    maxDepth(const int32_t tid) const
+    {
+        assert((tid >= 0) && (tid < static_cast<int32_t>(_maxDepthFilter.size())));
+        return _maxDepthFilter[tid];
+    }
+
+private:
+    bool _isMaxDepthFilter;
+    std::vector<double> _maxDepthFilter;
+};
diff --git a/src/c++/lib/manta/EventInfo.hh b/src/c++/lib/manta/EventInfo.hh
new file mode 100644
index 0000000..33b671d
--- /dev/null
+++ b/src/c++/lib/manta/EventInfo.hh
@@ -0,0 +1,44 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <string>
+
+
+/// data related to a multi-junction event
+///
+struct EventInfo
+{
+    EventInfo() {}
+
+    bool
+    isEvent() const
+    {
+        return (! label.empty());
+    }
+
+    unsigned junctionCount = 1;
+    std::string label;
+};
diff --git a/src/c++/lib/manta/JunctionIdGenerator.cpp b/src/c++/lib/manta/JunctionIdGenerator.cpp
new file mode 100644
index 0000000..d167255
--- /dev/null
+++ b/src/c++/lib/manta/JunctionIdGenerator.cpp
@@ -0,0 +1,53 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "manta/JunctionIdGenerator.hh"
+
+
+
+void
+JunctionIdGenerator::
+getId(
+    const EdgeInfo& edge,
+    const SVCandidate& sv,
+    const bool isRNA,
+    SVId& svId)
+{
+    using namespace EXTENDED_SV_TYPE;
+
+    svId.svType=(getExtendedSVType(sv, isRNA));
+
+    svId.localId = str(_SVIdFormatter % label(svId.svType) % edge.locusIndex % edge.nodeIndex1 % edge.nodeIndex2
+                       % sv.candidateIndex %  sv.assemblyAlignIndex % sv.assemblySegmentIndex );
+
+    if (isSVTransloc(svId.svType))
+    {
+        svId.mateId = svId.localId + ":1";
+        svId.localId = svId.localId + ":0";
+    }
+    else
+    {
+        svId.mateId.clear();
+    }
+}
diff --git a/src/c++/lib/manta/JunctionIdGenerator.hh b/src/c++/lib/manta/JunctionIdGenerator.hh
new file mode 100644
index 0000000..dbbd796
--- /dev/null
+++ b/src/c++/lib/manta/JunctionIdGenerator.hh
@@ -0,0 +1,70 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "svgraph/EdgeInfo.hh"
+#include "manta/SVCandidateUtil.hh"
+#include "boost/format.hpp"
+
+#include <string>
+
+
+/// A pair of ids for both ends of a single SV junction
+///
+/// the mateid will only be defined for tranlocations, and empty otherwise
+///
+struct SVId
+{
+    const char*
+    getLabel() const
+    {
+        return EXTENDED_SV_TYPE::label(svType);
+    }
+
+    EXTENDED_SV_TYPE::index_t svType = EXTENDED_SV_TYPE::UNKNOWN;
+    std::string localId;
+    std::string mateId;
+};
+
+
+/// create IDs for each variant that are guaranteed to be unique for a single
+/// manta run
+///
+struct JunctionIdGenerator
+{
+    JunctionIdGenerator() :
+        _SVIdFormatter("Manta%s:%i:%i:%i:%i:%i:%i")
+    {}
+
+    void
+    getId(
+        const EdgeInfo& edge,
+        const SVCandidate& sv,
+        const bool isRNA,
+        SVId& svId);
+
+private:
+    boost::format _SVIdFormatter;
+};
diff --git a/src/c++/lib/manta/MultiJunctionUtil.cpp b/src/c++/lib/manta/MultiJunctionUtil.cpp
new file mode 100644
index 0000000..743dc36
--- /dev/null
+++ b/src/c++/lib/manta/MultiJunctionUtil.cpp
@@ -0,0 +1,415 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "MultiJunctionUtil.hh"
+
+#include "blt_util/log.hh"
+#include "manta/SVCandidateUtil.hh"
+
+#include <limits>
+
+//#define DEBUG_SVDATA
+
+
+/// return true for candidates that should be filtered out, based on
+/// information available in a full junction set
+///
+static
+bool
+isFilterMultiJunctionCandidate(
+    const unsigned minCandidateSpanningCount,
+    const SVMultiJunctionCandidate& mjSV)
+{
+    // one breakend out of the junction set must have the minimum level of evidence:
+    bool isCountFilter(true);
+    bool isSingleFilter(true);
+    for (const SVCandidate& sv : mjSV.junction)
+    {
+        if (isSpanningSV(sv))
+        {
+            if (sv.bp1.getSpanningCount() >= minCandidateSpanningCount)
+            {
+                isCountFilter = false;
+            }
+
+            if (! sv.isSingleJunctionFilter)
+            {
+                isSingleFilter = false;
+            }
+        }
+
+#ifdef DEBUG_SVDATA
+        using namespace SVEvidenceType;
+        log_os << __FUNCTION__
+               << " spanning=" << sv.bp1.getSpanningCount()
+               << " pair=" << sv.bp1.lowresEvidence.getVal(PAIR)
+               << " cigar=" << sv.bp1.lowresEvidence.getVal(CIGAR)
+               << " split=" << sv.bp1.lowresEvidence.getVal(SPLIT_ALIGN)
+               << "\n"
+               << " isCountFilter=" << isCountFilter
+               << " isSingleFilter=" << isSingleFilter
+               << "\n";
+#endif
+
+    }
+    return (isCountFilter || isSingleFilter);
+}
+
+
+
+static
+unsigned
+getIntervalDist(
+    const GenomeInterval& intervalA,
+    const GenomeInterval& intervalB)
+{
+    static const unsigned far(std::numeric_limits<unsigned>::max());
+
+    if (intervalA.tid != intervalB.tid) return far;
+
+    return std::abs(intervalA.range.center_pos() - intervalB.range.center_pos());
+}
+
+
+
+///
+static
+bool
+isIntervalPairGroupCandidate(
+    const GenomeInterval& intervalA,
+    const GenomeInterval& intervalB,
+    const unsigned minFilterDist)
+{
+    return (getIntervalDist(intervalA,intervalB) < minFilterDist);
+}
+
+
+
+/// return:
+/// max(dist(A1,B1),dist(A2,B2)) if is11 is true
+/// or
+/// max(dist(A1,B2),dist(A2,B1)) if is11 is false
+///
+static
+unsigned
+getMaxIntervalDistance(
+    const SVCandidate& svA,
+    const SVCandidate& svB,
+    const bool is11)
+{
+    if (is11)
+    {
+        const unsigned dist11(getIntervalDist(svA.bp1.interval,svB.bp1.interval));
+        const unsigned dist22(getIntervalDist(svA.bp2.interval,svB.bp2.interval));
+        return std::max(dist11,dist22);
+    }
+    else
+    {
+        const unsigned dist12(getIntervalDist(svA.bp1.interval,svB.bp2.interval));
+        const unsigned dist21(getIntervalDist(svA.bp2.interval,svB.bp1.interval));
+        return std::max(dist12,dist21);
+    }
+}
+
+
+
+/// return  1 if dist(A1,B1) and dist(A2,B2) are both less than dist(A1,B2) and dist(A2,B1)
+/// return -1 if dist(A1,B2) and dist(A2,B1) are both less than dist(A1,B1) and dist(A2,B2)
+/// return 0 for all other cases
+static
+int
+getJunctionBpAlignment(
+    const SVCandidate& svA,
+    const SVCandidate& svB)
+{
+    const unsigned dist11(getIntervalDist(svA.bp1.interval,svB.bp1.interval));
+    const unsigned dist12(getIntervalDist(svA.bp1.interval,svB.bp2.interval));
+    const unsigned dist21(getIntervalDist(svA.bp2.interval,svB.bp1.interval));
+    const unsigned dist22(getIntervalDist(svA.bp2.interval,svB.bp2.interval));
+
+    if (((dist11 < dist12) && (dist11 < dist21)) && ((dist22 < dist12) && (dist22 < dist21))) return  1;
+    if (((dist12 < dist11) && (dist12 < dist22)) && ((dist21 < dist11) && (dist21 < dist22))) return -1;
+    return 0;
+}
+
+
+
+/// are two breakend pairs candidates for a multi-junction analysis?:
+///
+static
+bool
+isBreakendPairGroupCandidate(
+    const SVBreakend& bpA,
+    const SVBreakend& bpB,
+    const unsigned groupRange = 1000)
+{
+    if (! isOppositeOrientation(bpA.state, bpB.state)) return false;
+
+    return isIntervalPairGroupCandidate(bpA.interval, bpB.interval, groupRange);
+}
+
+
+
+/// test to see if a breakend can participate in a multi-junction analysis:
+///
+/// right now our only criteria is to exclude small non-inversions, just because
+/// such pairs can spontaneously occur at relatively high rates:
+static
+bool
+isSVMJExcluded(
+    const SVCandidate& sv)
+{
+    static const unsigned minInnieSVSize(100000);
+
+    {
+        using namespace SV_TYPE;
+        const SV_TYPE::index_t svt(getSVType(sv));
+        if ((svt != INDEL) && (svt != TANDUP)) return false;
+    }
+
+    return (getIntervalDist(sv.bp1.interval, sv.bp2.interval) < minInnieSVSize);
+}
+
+
+
+namespace MJ_INTERACTION
+{
+enum index_t
+{
+    NONE,
+    SAME,
+    FLIP,
+    CONFLICT
+};
+
+struct MJState
+{
+    void
+    clear()
+    {
+        type = NONE;
+        partnerId = 0;
+        maxPartnerDistance = 0;
+    }
+
+    index_t type = NONE;
+    unsigned partnerId = 0;
+    unsigned maxPartnerDistance = 0;
+};
+}
+
+
+
+static
+void
+setPartner(
+    std::vector<MJ_INTERACTION::MJState>& spanPartners,
+    const MJ_INTERACTION::index_t newType,
+    const unsigned maxPartnerDistance,
+    const unsigned spanIndex1,
+    const unsigned spanIndex2)
+{
+    spanPartners[spanIndex1].type = newType;
+    spanPartners[spanIndex1].partnerId = spanIndex2;
+    spanPartners[spanIndex1].maxPartnerDistance = maxPartnerDistance;
+}
+
+
+
+// 1 is the new partner
+// 2 is the previously connected partner
+static
+void
+resetPartners(
+    std::vector<MJ_INTERACTION::MJState>& spanPartners,
+    const MJ_INTERACTION::index_t newType,
+    const unsigned maxPartnerDistance,
+    const unsigned spanIndex1,
+    const unsigned spanIndex2)
+{
+    using namespace MJ_INTERACTION;
+
+    if (spanPartners[spanIndex2].maxPartnerDistance <= maxPartnerDistance)
+    {
+        // don't reset -- original pairing was better:
+        spanPartners[spanIndex1].type = NONE;
+        return;
+    }
+
+    // do reset, the new pairing is better:
+
+    // undo previous partner (2):
+    const unsigned spanIndexC(spanPartners[spanIndex2].partnerId);
+    assert(spanIndexC != spanIndex1);
+    spanPartners[spanIndexC].clear();
+    spanPartners[spanIndexC].type = CONFLICT;
+
+    // now initialize 1 and "reprogram" 2:
+    setPartner(spanPartners,newType,maxPartnerDistance,spanIndex1,spanIndex2);
+    setPartner(spanPartners,newType,maxPartnerDistance,spanIndex2,spanIndex1);
+}
+
+
+
+void
+findMultiJunctionCandidates(
+    const std::vector<SVCandidate>& svs,
+    const unsigned minCandidateSpanningCount,
+    unsigned& mjComplexCount,
+    unsigned& mjSpanningFilterCount,
+    std::vector<SVMultiJunctionCandidate>& mjSVs)
+{
+    mjSVs.clear();
+
+    std::vector<SVCandidate> complexSVs;
+    std::vector<SVCandidate> spanningSVs;
+
+    for (const SVCandidate& candidateSV : svs)
+    {
+        const bool isComplex(isComplexSV(candidateSV));
+
+        if (isComplex)
+        {
+            complexSVs.push_back(candidateSV);
+        }
+        else
+        {
+            spanningSVs.push_back(candidateSV);
+        }
+    }
+
+    mjComplexCount = complexSVs.size();
+
+    /// do a brute-force intersection test to see if we can associate candidates:
+    ///
+    /// intersection rules : breakend region center must be within distance N
+    /// intersecting breakend orientation makes it possible for these to be a single event -- ie. pointing away or towards each other
+    /// full set of intersections must complete a loop, this is an intentionally conservative requirement to make sure we start into
+    ///    this without getting involved in the really difficult stuff
+    ///
+    /// just for the starting version, the number of SVCandidates which can intersect is limited to 2
+    ///
+
+    const unsigned spanCount(spanningSVs.size());
+    std::vector<MJ_INTERACTION::MJState> spanPartners(spanCount);
+    {
+        using namespace MJ_INTERACTION;
+
+        for (unsigned spanIndexA(0); (spanIndexA+1)<spanCount; ++spanIndexA)
+        {
+            const SVCandidate& spanA(spanningSVs[spanIndexA]);
+            if (isSVMJExcluded(spanA)) continue;
+
+            for (unsigned spanIndexB(spanIndexA+1); spanIndexB<spanCount; ++spanIndexB)
+            {
+                const SVCandidate& spanB(spanningSVs[spanIndexB]);
+                if (isSVMJExcluded(spanB)) continue;
+
+                const bool isSameBpGroup(isBreakendPairGroupCandidate(spanA.bp1,spanB.bp1) && isBreakendPairGroupCandidate(spanA.bp2,spanB.bp2));
+                const bool isFlipBpGroup(isBreakendPairGroupCandidate(spanA.bp1,spanB.bp2) && isBreakendPairGroupCandidate(spanA.bp2,spanB.bp1));
+
+                bool isGroup(false);
+                if (isSameBpGroup || isFlipBpGroup)
+                {
+                    /// check that this isn't a flipped association as breakpoints get near each other,
+                    /// if it is treat the association as independent junctions:
+                    if (isSameBpGroup)
+                    {
+                        isGroup = (getJunctionBpAlignment(spanA, spanB) > 0);
+                    }
+                    else
+                    {
+                        isGroup = (getJunctionBpAlignment(spanA, spanB) < 0);
+                    }
+                }
+
+                if (!isGroup) continue;
+
+                const index_t newType( isSameBpGroup ? SAME : FLIP );
+                const unsigned maxPartnerDistance(getMaxIntervalDistance(spanA, spanB, isSameBpGroup));
+
+                if ((spanPartners[spanIndexA].type == NONE) && (spanPartners[spanIndexB].type == NONE))
+                {
+                    setPartner(spanPartners,newType,maxPartnerDistance,spanIndexA,spanIndexB);
+                    setPartner(spanPartners,newType,maxPartnerDistance,spanIndexB,spanIndexA);
+                }
+                else if (spanPartners[spanIndexA].type == NONE)
+                {
+                    resetPartners(spanPartners,newType,maxPartnerDistance,spanIndexA,spanIndexB);
+                }
+                else if (spanPartners[spanIndexB].type == NONE)
+                {
+                    resetPartners(spanPartners,newType,maxPartnerDistance,spanIndexB,spanIndexA);
+                }
+                else
+                {
+                    /// multiple candidates, keep the pair that's closer, and don't tolerate more than one repeat
+                    spanPartners[spanIndexA].type = CONFLICT;
+                    spanPartners[spanIndexB].type = CONFLICT;
+                }
+            }
+        }
+    }
+
+    // spanning SVs are checked for groupings, and filtered
+    //
+    // spanning SVs should come first in order so that overlap checks can
+    // be used to reduce work in the assembler
+    for (unsigned spanIndex(0); spanIndex<spanCount; ++spanIndex)
+    {
+        SVMultiJunctionCandidate mj;
+        mj.junction.push_back(spanningSVs[spanIndex]);
+
+        using namespace MJ_INTERACTION;
+        if ((spanPartners[spanIndex].type == SAME) ||
+            (spanPartners[spanIndex].type == FLIP))
+        {
+            const unsigned partnerId(spanPartners[spanIndex].partnerId);
+            assert(partnerId < spanCount);
+
+            // only include the connected pair once:
+            if (partnerId < spanIndex) continue;
+
+            mj.junction.push_back(spanningSVs[partnerId]);
+        }
+
+        if (isFilterMultiJunctionCandidate(minCandidateSpanningCount, mj))
+        {
+            mjSpanningFilterCount++;
+            continue;
+        }
+
+        mjSVs.push_back(mj);
+    }
+
+    // complex SVs are translated directly into single partner candidates:
+    for (const SVCandidate& candidateSV : complexSVs)
+    {
+        SVMultiJunctionCandidate mj;
+        mj.junction.push_back(candidateSV);
+        mjSVs.push_back(mj);
+    }
+
+
+}
diff --git a/src/c++/lib/manta/MultiJunctionUtil.hh b/src/c++/lib/manta/MultiJunctionUtil.hh
new file mode 100644
index 0000000..a8426f5
--- /dev/null
+++ b/src/c++/lib/manta/MultiJunctionUtil.hh
@@ -0,0 +1,39 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "manta/SVCandidate.hh"
+#include "manta/SVMultiJunctionCandidate.hh"
+
+#include <vector>
+
+
+void
+findMultiJunctionCandidates(
+    const std::vector<SVCandidate>& svs,
+    const unsigned minCandidateSpanningCount,
+    unsigned& mjComplexCount,
+    unsigned& mjSpanningFilterCount,
+    std::vector<SVMultiJunctionCandidate>& mjSVs);
diff --git a/src/c++/lib/manta/ReadChromDepthUtil.cpp b/src/c++/lib/manta/ReadChromDepthUtil.cpp
new file mode 100644
index 0000000..7af49ac
--- /dev/null
+++ b/src/c++/lib/manta/ReadChromDepthUtil.cpp
@@ -0,0 +1,503 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+
+#include "manta/ReadChromDepthUtil.hh"
+
+#include "blt_util/log.hh"
+#include "blt_util/MedianDepthTracker.hh"
+#include "blt_util/depth_buffer.hh"
+#include "common/Exceptions.hh"
+#include "htsapi/bam_header_info.hh"
+#include "htsapi/bam_streamer.hh"
+
+
+#include <iostream>
+#include <sstream>
+
+
+//#define DEBUG_DPS
+
+
+/// dynamically track median read depth
+///
+/// assume all reads align perfectly in place
+///
+/// This method removes zero depth before computing the median
+///
+struct DepthTracker
+{
+    void
+    setNewRegion()
+    {
+        if (! _isRegionInit) return;
+
+        flushPos(_maxPos);
+        _maxPos = 0;
+        _isRegionInit = false;
+        _depth.clear();
+    }
+
+    void
+    addRead(
+        const bam_record& bamRead)
+    {
+        const pos_t pos(bamRead.pos()-1);
+        const unsigned rsize(bamRead.read_size());
+        if (! _isRegionInit)
+        {
+            _maxPos=pos;
+            _isRegionInit=true;
+        }
+
+        for (; _maxPos<pos; ++_maxPos) flushPos(_maxPos);
+        _depth.inc(pos,rsize);
+        _count++;
+    }
+
+    double
+    getDepth() const
+    {
+        return _mtrack.getMedian();
+    }
+
+    uint64_t
+    getReadCount() const
+    {
+        return _count;
+    }
+
+private:
+
+    // flush position from depth tracker
+    void
+    flushPos(
+        const pos_t pos)
+    {
+        const unsigned depth(_depth.val(pos));
+        _mtrack.addObs(depth);
+        _depth.clear_pos(pos);
+    }
+
+    depth_buffer_compressible _depth = depth_buffer_compressible(16); ///< track depth for the purpose of filtering high-depth regions
+    MedianDepthTracker _mtrack;
+
+    bool _isRegionInit = false;
+    pos_t _maxPos = 0;
+
+    uint64_t _count = 0;
+};
+
+
+
+/// dynamically track average read depth
+///
+/// we don't need a really slick estimation here because we're tracking depth over large regions,
+/// all we need is:
+/// 1) how much read length did we observe? (alignment doesn't matter)
+/// 2) over what range of positions?
+///
+/// Note this method is designed to REMOVE large empty regions from the average
+struct MeanDepthTracker
+{
+    void
+    setNewRegion()
+    {
+        if (! _isRegionInit) return;
+        _priorRegionLength += currentRegionLength();
+        _minPos = 0;
+        _maxPos = 0;
+        _endPos = 0;
+        _isRegionInit = false;
+    }
+
+    void
+    addRead(
+        const bam_record& bamRead)
+    {
+        if (_isRegionInit)
+        {
+            if (bamRead.pos() > _endPos + 1000)
+            {
+                _maxPos=_endPos;
+                setNewRegion();
+            }
+        }
+
+        if (! _isRegionInit)
+        {
+            _minPos=bamRead.pos();
+            _maxPos=bamRead.pos();
+            _endPos=bamRead.pos() + bamRead.read_size();
+            _isRegionInit=true;
+        }
+        else
+        {
+            if (bamRead.pos() > _maxPos)
+            {
+                _maxPos = bamRead.pos();
+                _endPos=bamRead.pos() + bamRead.read_size();
+            }
+        }
+
+        _count++;
+        _totalReadLength += bamRead.read_size();
+    }
+
+    double
+    getMeanDepth() const
+    {
+        return (_totalReadLength / (_priorRegionLength + currentRegionLength()));
+    }
+
+    uint64_t
+    getReadCount() const
+    {
+        return _count;
+    }
+
+private:
+
+    double
+    currentRegionLength() const
+    {
+        return (1 + _maxPos - _minPos);
+    }
+
+
+    bool _isRegionInit = false;
+    int32_t _minPos = 0;
+    int32_t _maxPos = 0;
+    int32_t _endPos = 0;
+    double _priorRegionLength = 0;
+
+    uint64_t _count = 0;
+    // throw this into a double so we don't worry about underflow:
+    double _totalReadLength = 0;
+};
+
+
+
+/// all data required to build ChromDepth during estimation from the bam file
+///
+struct ChromDepthTracker
+{
+    explicit
+    ChromDepthTracker() :
+        _isFinalized(false),
+        _isChecked(false),
+        _isDepthConverged(false),
+        _oldDepth(-1)
+    {}
+
+    void
+    setNewRegion()
+    {
+        assert(! _isFinalized);
+        _mdTracker.setNewRegion();
+    }
+
+    void
+    addRead(const bam_record& bamRead)
+    {
+        assert(! _isFinalized);
+        _mdTracker.addRead(bamRead);
+    }
+
+    unsigned
+    depthObservations() const
+    {
+        return _mdTracker.getReadCount();
+    }
+
+    bool
+    isDepthCountCheck()
+    {
+        static const unsigned statsCheckCnt(1000000);
+        const bool isCheck((depthObservations() % statsCheckCnt) == 0);
+        if (isCheck) _isChecked=true;
+        return isCheck;
+    }
+
+    bool
+    isChecked() const
+    {
+        return (_isChecked || isDepthConverged());
+    }
+
+    void
+    clearChecked()
+    {
+        _isChecked=false;
+    }
+
+    bool
+    isDepthConverged() const
+    {
+        return _isDepthConverged;
+    }
+
+    void
+    updateDepthConvergenceTest()
+    {
+        // check convergence
+        const double depth(_mdTracker.getDepth());
+        if (_oldDepth >= 0)
+        {
+            _isDepthConverged=isDepthMatch(_oldDepth, depth);
+        }
+#ifdef DEBUG_DPS
+        log_os << "Test convergence. Old: " << _oldDepth << " New: " <<  _mdTracker.getDepth() << " Pass: " << _isDepthConverged << "\n";
+        log_os << "Test count. New: " <<  _mdTracker.getReadCount() << "\n";
+#endif
+        _oldDepth = depth;
+    }
+
+    double
+    getDepth() const
+    {
+        assert(_isFinalized);
+        return _mdTracker.getDepth();
+    }
+
+    void
+    finalize(
+        const bool isCompleteChrom = true)
+    {
+        if (_isFinalized) return;
+
+        // finalize insert size distro:
+        if (! isDepthConverged())
+        {
+            if (! isDepthCountCheck())
+            {
+                updateDepthConvergenceTest();
+            }
+
+            if (! (isCompleteChrom || isDepthConverged()))
+            {
+                log_os << "WARNING: chrom mean depth did not converge\n";
+            }
+        }
+
+        _isFinalized=true;
+    }
+
+private:
+
+    bool
+    isDepthMatch(
+        const double& d1,
+        const double& d2)
+    {
+        static const float dPrecision(0.05f);
+
+        return (std::abs(d1 - d2) < dPrecision);
+    }
+
+    bool _isFinalized;
+
+    bool _isChecked;
+    bool _isDepthConverged;
+
+    double _oldDepth; // previous depth is stored to determine convergence
+    DepthTracker _mdTracker;
+};
+
+
+
+/// get the start positions of chromosome segments
+/// ensure that all segments are no longer than segmentSize
+///
+/// all are zero-indexed
+static
+void
+getChromSegments(
+    const unsigned chromSize,
+    const unsigned segmentSize,
+    std::vector<unsigned>& startPos)
+{
+    assert(chromSize>0);
+    assert(segmentSize>0);
+
+    startPos.clear();
+
+    const unsigned chromSegments(1+((chromSize-1)/segmentSize));
+    const unsigned segmentBaseSize(chromSize/chromSegments);
+    const unsigned nPlusOne(chromSize%chromSegments);
+    unsigned start(0);
+
+    for (unsigned segmentIndex(0); segmentIndex<chromSegments; ++segmentIndex)
+    {
+        assert(start < chromSize);
+        startPos.push_back(start);
+        const unsigned segSize(segmentBaseSize + ((segmentIndex<nPlusOne) ? 1 : 0));
+        start=std::min(start+segSize,chromSize);
+    }
+}
+
+
+
+double
+readChromDepthFromAlignment(
+    const std::string& statsAlignmentFile,
+    const std::string& chromName)
+{
+    bam_streamer read_stream(statsAlignmentFile.c_str());
+
+    const bam_hdr_t& header(read_stream.get_header());
+    const bam_header_info bamHeader(header);
+
+    const auto& chromToIndex(bamHeader.chrom_to_index);
+    const auto chromIter(chromToIndex.find(chromName));
+    if (chromIter == chromToIndex.end())
+    {
+        using namespace illumina::common;
+
+        std::ostringstream oss;
+        oss << "ERROR: Can't find chromosome name '" << chromName << "' in BAM/CRAM file: '" << statsAlignmentFile << "\n";
+        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+    }
+
+    const int32_t chromIndex(chromIter->second);
+
+    const unsigned chromSize(bamHeader.chrom_data[chromIndex].length);
+    unsigned segmentSize(2000000);
+    std::vector<unsigned> segmentStartPos;
+
+    static const unsigned maxSSLoop(1000);
+    for (unsigned i=0; i<=maxSSLoop; ++i)
+    {
+        assert(i < maxSSLoop);
+        getChromSegments(chromSize, segmentSize, segmentStartPos);
+        if (segmentStartPos.size() <= 20) break;
+
+        const unsigned lastSegmentSize(segmentSize);
+        segmentSize *= 2;
+        assert(segmentSize > lastSegmentSize); //overflow gaurd
+    }
+
+    const unsigned totalSegments(segmentStartPos.size());
+
+    std::vector<unsigned> segmentHeadPos = segmentStartPos;
+    std::vector<bool> segmentIsEmpty(totalSegments,false);
+
+    ChromDepthTracker cdTracker;
+
+#ifdef DEBUG_DPS
+    log_os << "INFO: Chrom depth requesting bam region starting from: chrid: " << chromIndex << "\n";
+    log_os << "\tchromSize: " << chromSize << "\n";
+    for (const auto startPos : segmentStartPos)
+    {
+        log_os << "\tstartPos: " << startPos << "\n";
+    }
+#endif
+
+    // loop through segments until convergence criteria are met, or we run out of data:
+    static const unsigned maxCycle(10);
+    bool isFinished(false);
+    for (unsigned cycleIndex(0); cycleIndex<maxCycle; cycleIndex++)
+    {
+#ifdef DEBUG_DPS
+        log_os << "starting cycle: " << cycleIndex << "\n";
+#endif
+        bool isEmpty(true);
+        for (unsigned segmentIndex(0); segmentIndex<totalSegments; segmentIndex++)
+        {
+#ifdef DEBUG_DPS
+            log_os << "starting segment: " << segmentIndex << "\n";
+#endif
+            if (segmentIsEmpty[segmentIndex]) continue;
+
+            const int32_t startPos(segmentHeadPos[segmentIndex]);
+            const int32_t endPos(((segmentIndex+1)<totalSegments) ? segmentStartPos[segmentIndex+1]: chromSize);
+#ifdef DEBUG_DPS
+            log_os << "scanning region: " << startPos << "," << endPos << "\n";
+#endif
+            read_stream.set_new_region(chromIndex,startPos,endPos);
+
+            cdTracker.setNewRegion();
+
+            static const unsigned targetSegmentReadCount=40000;
+            static const int32_t minSpan(10000);
+            unsigned segmentReadCount(0);
+            while (read_stream.next())
+            {
+                // not allowed to test convergence until we've cycled through all segments once
+                if ((cycleIndex>0) && cdTracker.isDepthConverged())
+                {
+                    isFinished=true;
+                    break;
+                }
+
+                const bam_record& bamRead(*(read_stream.get_record_ptr()));
+                const int32_t readPos(bamRead.pos()-1);
+                if (readPos<startPos) continue;
+
+                segmentReadCount++;
+
+                if (readPos >= static_cast<int32_t>(segmentHeadPos[segmentIndex]))
+                {
+                    // cycle through to next segment:
+                    // doing this here ensures that we only cycle-out at the end of a position so that
+                    // no data is skipped if we come back to this segment again:
+                    if ((segmentReadCount > targetSegmentReadCount) && ((readPos-startPos) >= minSpan))
+                    {
+                        segmentHeadPos[segmentIndex] = readPos;
+                        break;
+                    }
+                    else
+                    {
+                        segmentHeadPos[segmentIndex] = readPos+1;
+                    }
+                }
+
+                // apply all filters:
+                if (bamRead.is_unmapped()) continue;
+
+                cdTracker.addRead(bamRead);
+
+                if (! cdTracker.isDepthCountCheck()) continue;
+
+                // check convergence
+                cdTracker.updateDepthConvergenceTest();
+            }
+
+            if (segmentReadCount>0)
+            {
+                isEmpty=false;
+            }
+            else
+            {
+                segmentIsEmpty[segmentIndex] = true;
+            }
+        }
+
+        if (isFinished || isEmpty) break;
+    }
+
+    cdTracker.finalize();
+
+    return cdTracker.getDepth();
+}
diff --git a/src/c++/lib/manta/ReadChromDepthUtil.hh b/src/c++/lib/manta/ReadChromDepthUtil.hh
new file mode 100644
index 0000000..7cf31b9
--- /dev/null
+++ b/src/c++/lib/manta/ReadChromDepthUtil.hh
@@ -0,0 +1,36 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include <string>
+
+
+/// fast chrom depth estimator for BAM/CRAM files
+///
+/// return average chromosome depth
+double
+readChromDepthFromAlignment(
+    const std::string& statsAlignmentFile,
+    const std::string& chromName);
diff --git a/src/c++/lib/manta/ReadGroupLabel.cpp b/src/c++/lib/manta/ReadGroupLabel.cpp
new file mode 100644
index 0000000..75659dd
--- /dev/null
+++ b/src/c++/lib/manta/ReadGroupLabel.cpp
@@ -0,0 +1,38 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "manta/ReadGroupLabel.hh"
+
+#include <iostream>
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const ReadGroupLabel& rgl)
+{
+    os << "read group '" << rgl.rgLabel << "' in bam file '" << rgl.bamLabel;
+    return os;
+}
diff --git a/src/c++/lib/manta/ReadGroupLabel.hh b/src/c++/lib/manta/ReadGroupLabel.hh
new file mode 100644
index 0000000..2807a0a
--- /dev/null
+++ b/src/c++/lib/manta/ReadGroupLabel.hh
@@ -0,0 +1,115 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/compat_util.hh"
+
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+
+#include <iosfwd>
+
+
+struct ReadGroupLabel
+{
+    /// if isCopyPtrs then the strings are copied and alloced/de-alloced by
+    /// the object, if false the client is responsible these pointers over
+    /// the lifetime of the label:
+    explicit
+    ReadGroupLabel(
+        const char* bamLabelInit,
+        const char* rgLabelInit,
+        const bool isCopyPtrsInit = true) :
+        isCopyPtrs(isCopyPtrsInit),
+        bamLabel((isCopyPtrs && (NULL != bamLabelInit)) ? strdup(bamLabelInit) : bamLabelInit),
+        rgLabel((isCopyPtrs && (NULL != rgLabelInit)) ? strdup(rgLabelInit) : rgLabelInit)
+    {
+        assert(NULL != bamLabel);
+        assert(NULL != rgLabel);
+    }
+
+    ReadGroupLabel(const ReadGroupLabel& rhs) :
+        isCopyPtrs(rhs.isCopyPtrs),
+        bamLabel(isCopyPtrs ? strdup(rhs.bamLabel) : rhs.bamLabel),
+        rgLabel(isCopyPtrs ? strdup(rhs.rgLabel) : rhs.rgLabel)
+    {}
+
+    ReadGroupLabel&
+    operator=(const ReadGroupLabel& rhs)
+    {
+        if (this == &rhs) return *this;
+        clear();
+        isCopyPtrs = rhs.isCopyPtrs;
+        bamLabel = (isCopyPtrs ? strdup(rhs.bamLabel) : rhs.bamLabel);
+        rgLabel = (isCopyPtrs ? strdup(rhs.rgLabel) : rhs.rgLabel);
+        return *this;
+    }
+
+public:
+
+    ~ReadGroupLabel()
+    {
+        clear();
+    }
+
+    /// sort allowing for NULL string pointers in primary and secondary key:
+    bool
+    operator<(
+        const ReadGroupLabel& rhs) const
+    {
+        const int scval(strcmp(bamLabel,rhs.bamLabel));
+        if (scval < 0) return true;
+        if (scval == 0)
+        {
+            return (strcmp(rgLabel,rhs.rgLabel) < 0);
+        }
+
+        return false;
+    }
+
+private:
+    void
+    clear()
+    {
+        if (isCopyPtrs)
+        {
+            if (NULL != bamLabel) free(const_cast<char*>(bamLabel));
+            if (NULL != rgLabel) free(const_cast<char*>(rgLabel));
+        }
+    }
+
+    bool isCopyPtrs;
+
+public:
+    const char* bamLabel;
+    const char* rgLabel;
+};
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const ReadGroupLabel& rgl);
diff --git a/src/c++/lib/manta/ReadGroupStats.hh b/src/c++/lib/manta/ReadGroupStats.hh
new file mode 100644
index 0000000..861b8e4
--- /dev/null
+++ b/src/c++/lib/manta/ReadGroupStats.hh
@@ -0,0 +1,53 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Bret Barnes, Xiaoyu Chen
+///
+
+#pragma once
+
+#include "blt_util/SizeDistribution.hh"
+#include "common/ReadPairOrient.hh"
+
+
+/// Read pair insert stats can be computed for each sample or read group, this
+/// class represents the statistics for one group:
+///
+struct ReadGroupStats
+{
+private:
+    friend class boost::serialization::access;
+    template<class Archive>
+    void serialize(
+        Archive& ar,
+        const unsigned /*version*/)
+    {
+        ar& boost::serialization::make_nvp("fragmentSizeDistribution", fragStats);
+        ar& boost::serialization::make_nvp("pairOrientation", relOrients);
+    }
+
+    ///////////////////////////// data:
+public:
+    SizeDistribution fragStats;
+    ReadPairOrient relOrients;
+};
+
+BOOST_CLASS_IMPLEMENTATION(ReadGroupStats, boost::serialization::object_serializable)
diff --git a/src/c++/lib/manta/ReadGroupStatsSet.cpp b/src/c++/lib/manta/ReadGroupStatsSet.cpp
new file mode 100644
index 0000000..6ae1f86
--- /dev/null
+++ b/src/c++/lib/manta/ReadGroupStatsSet.cpp
@@ -0,0 +1,147 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "ReadGroupStatsSet.hh"
+
+#include "blt_util/log.hh"
+#include "blt_util/io_util.hh"
+#include "blt_util/parse_util.hh"
+#include "blt_util/string_util.hh"
+
+// workaround intel compiler boost warnings:
+#include "boost/config.hpp"
+#ifdef BOOST_INTEL_CXX_VERSION
+#pragma warning push
+#pragma warning(disable:1944)
+#endif
+
+#include "boost/archive/xml_oarchive.hpp"
+#include "boost/archive/xml_iarchive.hpp"
+
+#ifdef BOOST_INTEL_CXX_VERSION
+#pragma warning pop
+#endif
+
+#include "boost/serialization/map.hpp"
+#include "boost/serialization/string.hpp"
+#include "boost/serialization/vector.hpp"
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+
+
+/// this struct exists for the sole purpose of xml output:
+struct ReadGroupStatsExporter
+{
+    template<class Archive>
+    void serialize(
+        Archive& ar,
+        const unsigned /*version*/)
+    {
+#ifdef READ_GROUPS
+        ar& boost::serialization::make_nvp("bamFile", bamFile);
+        ar& boost::serialization::make_nvp("readGroup", readGroup);
+#else
+        ar& boost::serialization::make_nvp("groupLabel", bamFile);
+#endif
+        ar& boost::serialization::make_nvp("groupStats", groupStats);
+    }
+
+    std::string bamFile;
+    std::string readGroup;
+    ReadGroupStats groupStats;
+};
+
+BOOST_CLASS_IMPLEMENTATION(ReadGroupStatsExporter, boost::serialization::object_serializable)
+
+
+
+void
+ReadGroupStatsSet::
+merge(
+    const ReadGroupStatsSet& rhs)
+{
+    const unsigned numGroups(rhs.size());
+    for (unsigned i(0); i<numGroups; ++i)
+    {
+        const ReadGroupLabel& mkey(rhs.getKey(i));
+        if (_group.test_key(mkey))
+        {
+            log_os << "ERROR: Can't merge stats set objects with repeated key: '" << mkey << "'\n";
+            exit(EXIT_FAILURE);
+        }
+
+        setStats(mkey,rhs.getStats(i));
+    }
+}
+
+
+
+void
+ReadGroupStatsSet::
+save(
+    const char* filename) const
+{
+    assert(NULL != filename);
+    std::ofstream ofs(filename);
+    boost::archive::xml_oarchive oa(ofs);
+
+    const unsigned numGroups(size());
+    oa << boost::serialization::make_nvp("numGroups", numGroups);
+    ReadGroupStatsExporter se;
+    for (unsigned i(0); i<numGroups; ++i)
+    {
+        const KeyType& key(getKey(i));
+        se.bamFile = key.bamLabel;
+        se.readGroup = key.rgLabel;
+        se.groupStats = getStats(i);
+
+        std::ostringstream oss;
+        oss << "groupStats_" << i;
+        oa << boost::serialization::make_nvp(oss.str().c_str(), se);
+    }
+}
+
+
+
+void
+ReadGroupStatsSet::
+load(
+    const char* filename)
+{
+    clear();
+
+    assert(NULL != filename);
+    std::ifstream ifs(filename);
+    boost::archive::xml_iarchive ia(ifs);
+
+    int numGroups;
+    ia >> boost::serialization::make_nvp("numGroups", numGroups);
+    ReadGroupStatsExporter se;
+    for (int i=0; i<numGroups; i++)
+    {
+        ia >> boost::serialization::make_nvp("bogus", se);
+
+        setStats(KeyType(se.bamFile.c_str(), se.readGroup.c_str()), se.groupStats);
+    }
+}
+
diff --git a/src/c++/lib/manta/ReadGroupStatsSet.hh b/src/c++/lib/manta/ReadGroupStatsSet.hh
new file mode 100644
index 0000000..3d3d833
--- /dev/null
+++ b/src/c++/lib/manta/ReadGroupStatsSet.hh
@@ -0,0 +1,115 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#pragma once
+
+#include "blt_util/id_map.hh"
+#include "manta/ReadGroupLabel.hh"
+#include "manta/ReadGroupStats.hh"
+
+#include "boost/optional.hpp"
+
+#include <iosfwd>
+#include <string>
+
+
+/// \brief manages multiple read_group_stats
+///
+struct ReadGroupStatsSet
+{
+    typedef ReadGroupLabel KeyType;
+
+    bool
+    empty() const
+    {
+        return _group.empty();
+    }
+
+    unsigned
+    size() const
+    {
+        return _group.size();
+    }
+
+    /// \brief get the index of a read group
+    ///
+    /// the index can be used for fast lookup of the
+    /// stats for that group
+    ///
+    /// if the group does not exist, the returned value
+    /// evaluates to false per boost::optional
+    ///
+    /// Each read group is identified as a combination of a bam filename and
+    /// an RG tag label. An empty label refers to the "default" read group
+    /// for the file (all records that had no RG tag).
+    boost::optional<unsigned>
+    getGroupIndex(
+        const ReadGroupLabel& rgLabel) const
+    {
+        return _group.get_optional_id(rgLabel);
+    }
+
+    /// get stats associated with index
+    const ReadGroupStats&
+    getStats(
+        const unsigned groupIndex) const
+    {
+        return _group.get_value(groupIndex);
+    }
+
+    const KeyType&
+    getKey(
+        const unsigned groupIndex) const
+    {
+        return _group.get_key(groupIndex);
+    }
+
+    /// set stats for index
+    void
+    setStats(
+        const ReadGroupLabel& rgLabel,
+        const ReadGroupStats& rps)
+    {
+        _group.insert(rgLabel,rps);
+    }
+
+    /// merge in the contents of another stats set object:
+    void
+    merge(
+        const ReadGroupStatsSet& rhs);
+
+    void
+    save(
+        const char* filename) const;
+
+    void
+    load(
+        const char* filename);
+
+private:
+    void
+    clear()
+    {
+        _group.clear();
+    }
+
+    id_map<KeyType, ReadGroupStats> _group;
+};
+
diff --git a/src/c++/lib/manta/ReadGroupStatsUtil.cpp b/src/c++/lib/manta/ReadGroupStatsUtil.cpp
new file mode 100644
index 0000000..fbec695
--- /dev/null
+++ b/src/c++/lib/manta/ReadGroupStatsUtil.cpp
@@ -0,0 +1,813 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Bret Barnes, Xiaoyu Chen
+///
+
+#include "manta/ReadGroupStatsUtil.hh"
+
+#include "blt_util/log.hh"
+#include "blt_util/ReadKey.hh"
+#include "common/Exceptions.hh"
+#include "htsapi/align_path_bam_util.hh"
+#include "htsapi/bam_record_util.hh"
+#include "htsapi/bam_streamer.hh"
+#include "manta/ReadGroupLabel.hh"
+#include "manta/SVLocusScanner.hh"
+
+#include <array>
+#include <iostream>
+#include <set>
+#include <sstream>
+#include <vector>
+
+
+
+/// compare distributions to determine stats convergence
+static
+bool
+isStatSetMatch(
+    const SizeDistribution& pss1,
+    const SizeDistribution& pss2)
+{
+    static const float cdfPrecision(0.001f);
+
+    for (float prob(0.05f); prob < 1; prob += 0.1f)
+    {
+        // check if percentile values equal
+        if (std::abs(pss1.quantile(prob) - pss2.quantile(prob)) >= 1)
+        {
+            return false;
+        }
+
+        // check the convergence of fragsize cdf
+        const int fragSize(pss2.quantile(prob));
+        if (std::abs(pss1.cdf(fragSize) - pss2.cdf(fragSize)) >= cdfPrecision)
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+
+
+/// This produces a useful result only when both reads align to the same
+/// chromosome.
+static
+PAIR_ORIENT::index_t
+getRelOrient(
+    const bam_record& br)
+{
+    pos_t pos1 = br.pos();
+    bool is_fwd_strand1 = br.is_fwd_strand();
+    pos_t pos2 = br.mate_pos();
+    bool is_fwd_strand2 = br.is_mate_fwd_strand();
+
+    if (! br.is_first())
+    {
+        std::swap(pos1,pos2);
+        std::swap(is_fwd_strand1,is_fwd_strand2);
+    }
+
+    return PAIR_ORIENT::get_index(pos1,is_fwd_strand1,pos2,is_fwd_strand2);
+}
+
+
+
+/// get insert size from bam record but limit the precision to 4 digits
+static
+unsigned
+getSimplifiedFragSize(
+    const bam_record& bamRead)
+{
+    unsigned fragSize(std::abs(bamRead.template_size()));
+
+    // reduce fragsize resolution for very large sizes:
+    // (large sizes are uncommon -- this doesn't need to be efficient, and it's not)
+    unsigned steps(0);
+    while (fragSize>1000)
+    {
+        fragSize /= 10;
+        steps++;
+    }
+    for (unsigned stepIndex(0); stepIndex<steps; ++stepIndex) fragSize *= 10;
+
+    return fragSize;
+}
+
+
+
+/// track pair orientation so that a consensus can be found for a read group
+///
+struct ReadGroupOrientTracker
+{
+    ReadGroupOrientTracker(
+        const char* bamLabel,
+        const char* rgLabel) :
+        _isFinalized(false),
+        _totalOrientCount(0),
+        _rgLabel(bamLabel,rgLabel)
+    {
+        std::fill(_orientCount.begin(),_orientCount.end(),0);
+    }
+
+    void
+    addOrient(
+        const PAIR_ORIENT::index_t ori)
+    {
+        static const unsigned maxOrientCount(100000);
+        if (_totalOrientCount >= maxOrientCount) return;
+        if (ori == PAIR_ORIENT::UNKNOWN) return;
+        addOrientImpl(ori);
+    }
+
+    const ReadPairOrient&
+    getConsensusOrient()
+    {
+        finalize();
+        return _finalOrient;
+    }
+
+private:
+
+    void
+    addOrientImpl(
+        const PAIR_ORIENT::index_t ori)
+    {
+        assert(! _isFinalized);
+        assert((ori>=0) && (ori<PAIR_ORIENT::SIZE));
+
+        _orientCount[ori]++;
+        _totalOrientCount++;
+    }
+
+    void
+    finalize()
+    {
+        if (_isFinalized) return;
+
+        bool isMaxIndex(false);
+        unsigned maxIndex(0);
+        for (unsigned i(0); i<_orientCount.size(); ++i)
+        {
+            if ((! isMaxIndex) || (_orientCount[i] > _orientCount[maxIndex]))
+            {
+                isMaxIndex=true;
+                maxIndex=i;
+            }
+        }
+
+        assert(isMaxIndex);
+
+        _finalOrient.setVal(maxIndex);
+
+        {
+            // make sure there's a dominant consensus orientation and that we have a minimum number of samples:
+            static const unsigned minCount(100);
+            static const float minMaxFrac(0.9f);
+
+            using namespace illumina::common;
+
+            if (_totalOrientCount < minCount)
+            {
+                std::ostringstream oss;
+                oss << "ERROR: Too few observations (" << _totalOrientCount << ") to determine pair orientation for " << _rgLabel << "'\n";
+                BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+            }
+
+            const unsigned minMaxCount(static_cast<unsigned>(minMaxFrac*_totalOrientCount));
+            if (_orientCount[maxIndex] < minMaxCount)
+            {
+                const unsigned maxPercent((_orientCount[maxIndex]*100)/_totalOrientCount);
+                std::ostringstream oss;
+                oss << "ERROR: Can't determine consensus pair orientation of " << _rgLabel << ".\n"
+                    << "\tThe most frequent orientation is '" << _finalOrient << "' (" << maxPercent << "% of " << _totalOrientCount << " total observations)\n";
+                BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+            }
+        }
+
+        _isFinalized=true;
+    }
+
+    bool _isFinalized;
+    unsigned _totalOrientCount;
+    const ReadGroupLabel _rgLabel;
+    std::array<unsigned,PAIR_ORIENT::SIZE> _orientCount;
+
+    ReadPairOrient _finalOrient;
+};
+
+
+
+
+/// all data required to build ReadGroupStats during estimation from the bam file
+///
+/// ultimately the only information we want to keep is the ReadGroupStats object itself,
+/// which can be exported from this object
+///
+struct ReadGroupTracker
+{
+    explicit
+    ReadGroupTracker(
+        const char* bamLabel = nullptr,
+        const char* rgLabel = nullptr) :
+        _isFinalized(false),
+        _rgLabel(bamLabel, rgLabel),
+        _orientInfo(bamLabel, rgLabel),
+        _isChecked(false),
+        _isInsertSizeConverged(false)
+    {}
+
+    void
+    addOrient(
+        const PAIR_ORIENT::index_t ori)
+    {
+        assert(! _isFinalized);
+
+        _orientInfo.addOrient(ori);
+    }
+
+    void
+    addInsertSize(const int size)
+    {
+        assert(! _isFinalized);
+
+        _stats.fragStats.addObservation(size);
+    }
+
+    unsigned
+    insertSizeObservations() const
+    {
+        return _stats.fragStats.totalObservations();
+    }
+
+    bool
+    isInsertSizeCountCheck()
+    {
+        static const unsigned statsCheckCnt(100000);
+        const bool isCheck((insertSizeObservations() % statsCheckCnt) == 0);
+        if (isCheck) _isChecked=true;
+        return isCheck;
+    }
+
+    bool
+    isChecked() const
+    {
+        return (_isChecked || isInsertSizeConverged());
+    }
+
+    void
+    clearChecked()
+    {
+        _isChecked=false;
+    }
+
+    bool
+    isInsertSizeConverged() const
+    {
+        return _isInsertSizeConverged;
+    }
+
+    void
+    updateInsertSizeConvergenceTest()
+    {
+        // check convergence
+        if (_oldInsertSize.totalObservations() > 0)
+        {
+            _isInsertSizeConverged=isStatSetMatch(_oldInsertSize, _stats.fragStats);
+        }
+        _oldInsertSize = _stats.fragStats;
+    }
+
+    /// getting a const ref of the stats forces finalization steps:
+    const ReadGroupStats&
+    getStats() const
+    {
+        assert(_isFinalized);
+        return _stats;
+    }
+
+    void
+    finalize()
+    {
+        if (_isFinalized) return;
+
+        // finalize pair orientation:
+        _stats.relOrients = _orientInfo.getConsensusOrient();
+
+        if (_stats.relOrients.val() != PAIR_ORIENT::Rp)
+        {
+            using namespace illumina::common;
+
+            std::ostringstream oss;
+            oss << "ERROR: Unexpected consensus read orientation (" << _stats.relOrients << ") for " << _rgLabel << "\n"
+                << "\tManta currently handles paired-end (FR) reads only.\n";
+            BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+        }
+
+        // finalize insert size distro:
+        if (! isInsertSizeConverged())
+        {
+            if (_stats.fragStats.totalObservations() < 100)
+            {
+                using namespace illumina::common;
+
+                std::ostringstream oss;
+                oss << "ERROR: Can't generate pair statistics for " << _rgLabel << "\n"
+                    << "\tTotal observed read pairs: " << insertSizeObservations() << "\n";
+                BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+            }
+            else if (! isInsertSizeCountCheck())
+            {
+                updateInsertSizeConvergenceTest();
+            }
+
+            if (! isInsertSizeConverged())
+            {
+                log_os << "WARNING: read pair statistics did not converge for " << _rgLabel << "\n";
+            }
+        }
+
+        // final step before saving is to cut-off the extreme end of the fragment size distribution, this
+        // is similar the some aligner's proper-pair bit definition of (3x the standard mean, etc.)
+        static const float filterQuant(0.9995f);
+        _stats.fragStats.filterObservationsOverQuantile(filterQuant);
+
+        _isFinalized=true;
+    }
+
+private:
+
+    bool _isFinalized;
+    const ReadGroupLabel _rgLabel;
+    ReadGroupOrientTracker _orientInfo;
+
+    bool _isChecked;
+    bool _isInsertSizeConverged;
+    SizeDistribution _oldInsertSize; // previous fragment distribution is stored to determine convergence
+
+    ReadGroupStats _stats;
+};
+
+
+
+struct ReadAlignFilter
+{
+    /// use only the most conservative alignments to generate fragment stats --
+    /// filter reads containing any cigar types besides MATCH with optional trailing soft-clip
+    bool
+    isFilterRead(
+        const bam_record& bamRead)
+    {
+        using namespace ALIGNPATH;
+
+        bam_cigar_to_apath(bamRead.raw_cigar(), bamRead.n_cigar(), _apath);
+
+        if (! bamRead.is_fwd_strand()) std::reverse(_apath.begin(),_apath.end());
+
+        bool isMatched(false);
+        bool isClipped(false);
+        for (const path_segment& ps : _apath)
+        {
+            if (is_segment_align_match(ps.type))
+            {
+                if (isClipped) return true;
+                isMatched = true;
+            }
+            else if (ps.type == SOFT_CLIP)
+            {
+                isClipped = true;
+            }
+            else
+            {
+                return true;
+            }
+        }
+        return (! isMatched);
+    }
+
+private:
+    ALIGNPATH::path_t _apath;
+};
+
+
+
+/// this object handles filtration of reads which are:
+///
+/// 1. not downstream or (if both reads start at same position) not the second in order in the bam file
+/// 2. part of a high depth pileup region
+///
+struct ReadPairDepthFilter
+{
+    bool
+    isFilterRead(
+        const bam_record& bamRead)
+    {
+        static const unsigned maxPosCount(1);
+
+        if (bamRead.target_id() != _lastTargetId)
+        {
+            _goodMates.clear();
+            _lastTargetId=bamRead.target_id();
+            _posCount=0;
+            _lastPos = bamRead.pos();
+        }
+        else if (bamRead.pos() != _lastPos)
+        {
+            _posCount=0;
+            _lastPos = bamRead.pos();
+        }
+
+        // Assert only two reads per fragment
+        const unsigned readNum(bamRead.is_first() ? 1 : 2);
+        assert(bamRead.is_second() == (readNum == 2));
+
+        // sample each read pair once by sampling stats from
+        // downstream read only, or whichever read is encountered
+        // second if the read and its mate start at the same position:
+        const bool isDownstream(bamRead.pos() > bamRead.mate_pos());
+        const bool isSamePos(bamRead.pos() == bamRead.mate_pos());
+
+        if (isDownstream || isSamePos)
+        {
+            const int mateReadNo( bamRead.is_first() ? 2 : 1);
+            const ReadKey mateKey(bamRead.qname(), mateReadNo, false);
+
+            mateMap_t::iterator i(_goodMates.find(mateKey));
+
+            if (i == _goodMates.end())
+            {
+                if (isDownstream) return true;
+            }
+            else
+            {
+                _goodMates.erase(i);
+                return false;
+            }
+        }
+
+        // to prevent high-depth pileups from overly biasing the
+        // read stats, we only take maxPosCount read pairs from each start
+        // pos. by not inserting a key in goodMates, we also filter
+        // the downstream mate:
+        if (_posCount>=maxPosCount) return true;
+        ++_posCount;
+
+        // crude mechanism to manage total set memory
+        static const unsigned maxMateSetSize(100000);
+        if (_goodMates.size() > maxMateSetSize) _goodMates.clear();
+
+        _goodMates.insert(ReadKey(bamRead));
+
+        return true;
+    }
+
+private:
+    typedef std::set<ReadKey> mateMap_t;
+
+    unsigned _posCount = 0;
+    mateMap_t _goodMates;
+
+    int _lastTargetId = 0;
+    int _lastPos = 0;
+};
+
+
+
+struct CoreInsertStatsReadFilter
+{
+    bool
+    isFilterRead(
+        const bam_record& bamRead)
+    {
+        // filter common categories of undesirable reads:
+        if (SVLocusScanner::isReadFilteredCore(bamRead)) return true;
+
+        if (bamRead.isNonStrictSupplement()) return true;
+
+        if (! is_mapped_chrom_pair(bamRead)) return true;
+        if (bamRead.map_qual()==0) return true;
+
+        // filter any split reads with an SA tag:
+        if (bamRead.isSASplit()) return true;
+
+        // remove alignments other than {X}M({Y}S)? (or reverse for reverse strand)
+        if (alignFilter.isFilterRead(bamRead)) return true;
+
+        // filter out upstream reads and high depth regions:
+        if (pairFilter.isFilterRead(bamRead)) return true;
+
+        return false;
+    }
+
+    ReadAlignFilter alignFilter;
+    ReadPairDepthFilter pairFilter;
+};
+
+
+
+#if 0
+/// samples around various short segments of the genome so
+/// that stats generation isn't biased towards a single region
+struct GenomeSampler
+{
+    GenomeSampler(
+        const std::string& bamFile) :
+        _readStream(bamFile.c_str()),
+        _chromCount(0),
+        _isInitRegion(false),
+        _isInitRecord(false),
+        _isActiveChrom(true),
+        _currentChrom(0)
+    {
+        const bam_hdr_t& header(*_readStream.get_header());
+
+        _chromCount = (header.n_targets);
+
+        _isActiveChrom = (_chromCount > 0);
+
+        _chromSize.resize(_chromCount,0);
+        _chromHighestPos.resize(_chromCount,-1);
+
+        for (int32_t i(0); i<_chromCount; ++i)
+        {
+            _chromSize[i] = (header.target_len[i]);
+        }
+
+        init();
+    }
+
+    /// advance to next region of the genome, this must be called before using nextRecord():
+    bool
+    nextRegion()
+    {
+        if (! _isActiveChrom) return false;
+
+        _isInitRegion=true;
+        _isInitRecord=false;
+        return true;
+    }
+
+    /// advance to next record in the current region, this must be called before using getBamRecord():
+    bool
+    nextRecord()
+    {
+        assert(_isInitRegion);
+
+        if (! _isActiveChrom) return false;
+
+        _isInitRecord=true;
+        return true;
+    }
+
+    /// access current bam record
+    const bam_record&
+    getBamRecord()
+    {
+        assert(_isInitRegion);
+        assert(_isInitRecord);
+
+        return *(_readStream.get_record_ptr());
+    }
+
+private:
+    void
+    init()
+    {
+        if (! _isActiveChrom) return;
+
+        const int32_t startPos(_chromHighestPos[chromIndex]+1);
+
+        _currentChrom=0;
+    }
+
+    bam_streamer _readStream;
+    int32_t _chromCount;
+    std::vector<int32_t> _chromSize;
+
+    std::vector<int32_t> _chromHighestPos;
+    bool _isInitRegion;
+    bool _isInitRecord;
+    bool _isActiveChrom; ///< this is used to track whether we've reached the end of all chromosomes
+    int32_t _currentChrom;
+};
+#endif
+
+
+
+/// manage the info structs for each RG
+struct ReadGroupManager
+{
+    typedef std::map<ReadGroupLabel,ReadGroupTracker> RGMapType;
+
+    explicit
+    ReadGroupManager(
+        const std::string& statsBamFile) :
+        _isFinalized(false),
+        _statsBamFile(statsBamFile)
+    {}
+
+    ReadGroupTracker&
+    getTracker(
+        const bam_record& bamRead)
+    {
+        const char* readGroup(getReadGroup(bamRead));
+
+        return getTracker(readGroup);
+    }
+
+    ReadGroupTracker&
+    getTracker(
+        const char* readGroup)
+    {
+        ReadGroupLabel rgKey(_statsBamFile.c_str(), readGroup, false);
+
+        RGMapType::iterator rgIter(_rgTracker.find(rgKey));
+        if (rgIter == _rgTracker.end())
+        {
+            std::pair<RGMapType::iterator,bool> retval;
+            retval = _rgTracker.insert(std::make_pair(ReadGroupLabel(_statsBamFile.c_str(), readGroup), ReadGroupTracker(_statsBamFile.c_str(),readGroup)));
+
+            assert(retval.second);
+            rgIter = retval.first;
+        }
+
+        return rgIter->second;
+    }
+
+    // check if all read groups have been sufficiently sampled in this region:
+    bool
+    isFinishedRegion()
+    {
+        for (RGMapType::value_type& val : _rgTracker)
+        {
+            if (! val.second.isChecked()) return false;
+        }
+
+        for (RGMapType::value_type& val : _rgTracker)
+        {
+            val.second.clearChecked();
+        }
+
+        return true;
+    }
+
+    // test if all read groups have converged or hit other stopping conditions
+    bool
+    isStopEstimation()
+    {
+        static const unsigned maxRecordCount(5000000);
+        for (RGMapType::value_type& val : _rgTracker)
+        {
+            if (! (val.second.isInsertSizeConverged() || (val.second.insertSizeObservations()>maxRecordCount))) return false;
+        }
+        return true;
+    }
+
+    const RGMapType&
+    getMap()
+    {
+        finalize();
+        return _rgTracker;
+    }
+
+private:
+
+    void
+    finalize()
+    {
+        if (_isFinalized) return;
+        for (RGMapType::value_type& val : _rgTracker)
+        {
+            val.second.finalize();
+        }
+        _isFinalized=true;
+    }
+
+    bool _isFinalized;
+    const std::string _statsBamFile;
+    RGMapType _rgTracker;
+};
+
+
+
+void
+extractReadGroupStatsFromBam(
+    const std::string& statsBamFile,
+    ReadGroupStatsSet& rstats)
+{
+    bam_streamer read_stream(statsBamFile.c_str());
+
+    const bam_hdr_t& header(read_stream.get_header());
+    const int32_t chromCount(header.n_targets);
+    std::vector<int32_t> chromSize(chromCount,0);
+    std::vector<int32_t> chromHighestPos(chromCount,-1);
+    for (int32_t i(0); i<chromCount; ++i)
+    {
+        chromSize[i] = (header.target_len[i]);
+    }
+
+    bool isStopEstimation(false);
+    bool isActiveChrom(true);
+
+    CoreInsertStatsReadFilter coreFilter;
+    ReadGroupManager rgManager(statsBamFile.c_str());
+
+#ifndef READ_GROUPS
+    static const char defaultReadGroup[] = "";
+    ReadGroupTracker& rgInfo(rgManager.getTracker(defaultReadGroup));
+#endif
+
+    while (isActiveChrom && (!isStopEstimation))
+    {
+        isActiveChrom=false;
+        for (int32_t chromIndex(0); chromIndex<chromCount; ++chromIndex)
+        {
+            if (isStopEstimation) break;
+
+            const int32_t startPos(chromHighestPos[chromIndex]+1);
+#ifdef DEBUG_RPS
+            std::cerr << "INFO: Stats requesting bam region starting from: chrid: " << chromIndex << " start: " << startPos << "\n";
+#endif
+            if (startPos >= chromSize[chromIndex]) continue;
+
+            read_stream.set_new_region(chromIndex,startPos,chromSize[chromIndex]);
+            while (read_stream.next())
+            {
+                const bam_record& bamRead(*(read_stream.get_record_ptr()));
+                if (bamRead.pos()<startPos) continue;
+
+                chromHighestPos[chromIndex]=bamRead.pos();
+                isActiveChrom=true;
+
+                if (coreFilter.isFilterRead(bamRead)) continue;
+
+#ifdef READ_GROUPS
+                ReadGroupTracker& rgInfo(rgManager.getTracker(bamRead));
+#endif
+
+                if (rgInfo.isInsertSizeConverged()) continue;
+
+                // get orientation stats before final filter for innie reads below:
+                //
+                // we won't use anything but innie reads for insert size stats, but sampling
+                // orientation beforehand allows us to detect, ie. a mate-pair library
+                // so that we can blow-up with an informative error msg
+                //
+                const PAIR_ORIENT::index_t ori(getRelOrient(bamRead));
+                rgInfo.addOrient(ori);
+
+                // filter mapped innies on the same chrom
+                //
+                // note we don't rely on the proper pair bit because this already contains an
+                // arbitrary length filter and  subjects the method to aligner specific variation
+                //
+                // TODO: ..note this locks-in standard ilmn orientation -- okay for now but this function needs major
+                // re-arrangement for mate-pair support, we could still keep independence from each aligner's proper
+                // pair decisions by estimating a fragment distro for each orientation and only keeping the one with
+                // the most samples
+                //
+                if (ori != PAIR_ORIENT::Rp) continue;
+
+                rgInfo.addInsertSize(getSimplifiedFragSize(bamRead));
+
+                if (! rgInfo.isInsertSizeCountCheck()) continue;
+
+                // check convergence
+                rgInfo.updateInsertSizeConvergenceTest();
+
+                if (! rgManager.isFinishedRegion()) continue;
+
+                isStopEstimation = rgManager.isStopEstimation();
+
+                // break from reading the current chromosome
+                break;
+            }
+        }
+    }
+
+    for (const ReadGroupManager::RGMapType::value_type& val : rgManager.getMap())
+    {
+        rstats.setStats(val.first, val.second.getStats());
+    }
+}
diff --git a/src/c++/lib/manta/ReadGroupStatsUtil.hh b/src/c++/lib/manta/ReadGroupStatsUtil.hh
new file mode 100644
index 0000000..416a03c
--- /dev/null
+++ b/src/c++/lib/manta/ReadGroupStatsUtil.hh
@@ -0,0 +1,35 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Bret Barnes, Xiaoyu Chen
+///
+
+#pragma once
+
+#include "manta/ReadGroupStatsSet.hh"
+
+#include <string>
+
+
+void
+extractReadGroupStatsFromBam(
+    const std::string& statsBamFile,
+    ReadGroupStatsSet& rstats);
diff --git a/src/c++/lib/manta/RemoteMateReadUtil.cpp b/src/c++/lib/manta/RemoteMateReadUtil.cpp
new file mode 100644
index 0000000..ba59087
--- /dev/null
+++ b/src/c++/lib/manta/RemoteMateReadUtil.cpp
@@ -0,0 +1,64 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Ole Schulz-Trieglaff
+/// \author Chris Saunders
+///
+
+#include "RemoteMateReadUtil.hh"
+
+#include <cstdlib>
+
+
+
+bool
+isMateInsertionEvidenceCandidate(
+    const bam_record& bamRead,
+    const unsigned minMapq)
+{
+    if (! bamRead.is_paired()) return false;
+    if (bamRead.isNonStrictSupplement()) return false;
+    if (bamRead.is_unmapped() || bamRead.is_mate_unmapped()) return false;
+
+    if (bamRead.map_qual() < minMapq) return false;
+
+    if (bamRead.target_id() < 0) return false;
+    if (bamRead.mate_target_id() < 0) return false;
+
+    if (bamRead.target_id() != bamRead.mate_target_id()) return true;
+
+    /// TODO: better candidate definition based on fragment size distro:
+    static const int minSize(10000);
+    return (std::abs(bamRead.pos()-bamRead.mate_pos()) >= minSize);
+}
+
+
+
+bool
+isMateInsertionEvidenceCandidate2(
+    const bam_record& bamRead,
+    const bool isSearchForLeftOpen,
+    const bool isSearchForRightOpen)
+{
+    if ((! isSearchForLeftOpen) && (! bamRead.is_fwd_strand())) return false;
+    if ((! isSearchForRightOpen) && bamRead.is_fwd_strand()) return false;
+    return true;
+}
diff --git a/src/c++/lib/manta/RemoteMateReadUtil.hh b/src/c++/lib/manta/RemoteMateReadUtil.hh
new file mode 100644
index 0000000..f25750e
--- /dev/null
+++ b/src/c++/lib/manta/RemoteMateReadUtil.hh
@@ -0,0 +1,85 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#pragma once
+
+#include "htsapi/bam_record.hh"
+
+#include <string>
+
+/// this is the first of 2 components to determine mate
+/// insert candidacy. Note that we assume base filtration
+/// (PCR dups, etc.) has already occurred.
+///
+bool
+isMateInsertionEvidenceCandidate(
+    const bam_record& bamRead,
+    const unsigned minMapq);
+
+
+bool
+isMateInsertionEvidenceCandidate2(
+    const bam_record& bamRead,
+    const bool isSearchForLeftOpen,
+    const bool isSearchForRightOpen);
+
+
+/// information recorded for reads where we need to grab the mate from a remote locus
+///
+/// typically these are chimeras with a MAPQ0 mate used to assemble a large insertion
+///
+struct RemoteReadInfo
+{
+    explicit
+    RemoteReadInfo(
+        const bam_record& bamRead)
+        : qname(bamRead.qname()),
+          readNo(bamRead.read_no()==1 ? 2 : 1),
+          tid(bamRead.mate_target_id()),
+          pos(bamRead.mate_pos() - 1),
+          localPos(bamRead.pos() - 1),
+          readSize(bamRead.read_size()),
+          isLocalFwd(bamRead.is_fwd_strand()),
+          isFound(false),
+          isUsed(false)
+    {}
+
+    bool
+    operator<(
+        const RemoteReadInfo& rhs) const
+    {
+        if (tid < rhs.tid) return true;
+        if (tid == rhs.tid)
+        {
+            return (pos < rhs.pos);
+        }
+        return false;
+    }
+
+    std::string qname;
+    int readNo; // this is read number of the target
+    int tid;
+    int pos;
+    int localPos;
+    int readSize;
+    bool isLocalFwd;
+    bool isFound;
+    bool isUsed;
+};
diff --git a/src/c++/lib/manta/SVBreakend.cpp b/src/c++/lib/manta/SVBreakend.cpp
new file mode 100644
index 0000000..3e09d00
--- /dev/null
+++ b/src/c++/lib/manta/SVBreakend.cpp
@@ -0,0 +1,57 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "manta/SVBreakend.hh"
+
+#include <iostream>
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVBreakendLowResEvidence& sce)
+{
+    using namespace SVEvidenceType;
+
+    os << "SVBreakendLowResEvidence:";
+    for (int i(0); i<SIZE; ++i)
+    {
+        os << " " << label(i) << ": " << sce.getVal(i);
+    }
+
+    return os;
+}
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVBreakend& svb)
+{
+    os << "Breakend:"
+       << " " << svb.interval
+       << " " << SVBreakendState::label(svb.state) << "\n"
+       << "\t" << svb.lowresEvidence << "\n";
+    return os;
+}
diff --git a/src/c++/lib/manta/SVBreakend.hh b/src/c++/lib/manta/SVBreakend.hh
new file mode 100644
index 0000000..756510d
--- /dev/null
+++ b/src/c++/lib/manta/SVBreakend.hh
@@ -0,0 +1,404 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "svgraph/GenomeInterval.hh"
+
+#include <cassert>
+
+#include <array>
+#include <iosfwd>
+
+namespace SVEvidenceType
+{
+enum index_t
+{
+    PAIR,        /// a pair observation based on both read BAM records
+    LOCAL_PAIR,  /// a pair observation based on one read, htere
+    CIGAR,
+    SOFTCLIP,
+    SEMIALIGN,
+    SHADOW,
+    SPLIT_ALIGN,
+    UNKNOWN,    /// temporary state
+    SIZE
+};
+
+inline
+const char*
+label(const int i)
+{
+    switch (i)
+    {
+    case PAIR:
+        return "pair";
+    case LOCAL_PAIR:
+        return "local_pair";
+    case CIGAR:
+        return "cigar";
+    case SOFTCLIP:
+        return "softclip";
+    case SEMIALIGN:
+        return "semialign";
+    case SHADOW:
+        return "shadow";
+    case SPLIT_ALIGN:
+        return "split_align";
+    case UNKNOWN:
+        return "unknown";
+    default:
+        assert(false && "Unknown SVCandidate evidence type");
+        return NULL;
+    }
+}
+
+inline
+bool
+isPairType(const int i)
+{
+    switch (i)
+    {
+    case PAIR:
+    case LOCAL_PAIR:
+        return true;
+    default:
+        return false;
+    }
+}
+}
+
+
+
+/// enumerate candidate evidence counts:
+struct SVBreakendLowResEvidence
+{
+    typedef SVBreakendLowResEvidence self_t;
+
+    SVBreakendLowResEvidence()
+    {
+        clear();
+    }
+
+    SVBreakendLowResEvidence(
+        const self_t& rhs)
+    {
+        clear();
+        merge(rhs);
+    }
+
+    const self_t&
+    operator=(const self_t& rhs)
+    {
+        if (this == &rhs) return *this;
+
+        clear();
+        merge(rhs);
+        return *this;
+    }
+
+    unsigned
+    getVal(const int i) const
+    {
+        assert((i>=0) && (i<SVEvidenceType::SIZE));
+        return _evidence[i];
+    }
+
+    unsigned
+    getTotal() const
+    {
+        unsigned sum(0);
+        for (int i(0); i<SVEvidenceType::SIZE; ++i)
+        {
+            sum += _evidence[i];
+        }
+        return sum;
+    }
+
+    void
+    clear()
+    {
+        for (int i(0); i<SVEvidenceType::SIZE; ++i) _evidence[i] = 0;
+    }
+
+    void
+    add(const int i,
+        const unsigned count = 1)
+    {
+        assert((i>=0) && (i<SVEvidenceType::SIZE));
+        _evidence[i] += count;
+    }
+
+    void
+    merge(const self_t& rhs)
+    {
+        for (int i(0); i<SVEvidenceType::SIZE; ++i)
+        {
+            _evidence[i] += rhs._evidence[i];
+        }
+    }
+
+private:
+    std::array<unsigned short, SVEvidenceType::SIZE> _evidence;
+};
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVBreakendLowResEvidence& sce);
+
+
+
+
+
+namespace SVBreakendState
+{
+enum index_t
+{
+    UNKNOWN,    // Everything else not covered below
+    RIGHT_OPEN, // 5' side of region is mapped
+    LEFT_OPEN,  // 3' side of region is mapped
+    COMPLEX     // A typical small scale assembly locus -- something is happening in a small region,
+    // the event might be local to that region but we don't know
+};
+
+inline
+const char*
+label(const index_t idx)
+{
+    switch (idx)
+    {
+    case UNKNOWN:
+        return "UNKNOWN";
+    case RIGHT_OPEN:
+        return "RIGHT_OPEN";
+    case LEFT_OPEN:
+        return "LEFT_OPEN";
+    case COMPLEX:
+        return "COMPLEX";
+    default:
+        return "UNKNOWN";
+    }
+}
+
+/// return true if this is a 'normal' breakend with a known orientation
+///
+/// a false return should typically be for a region targeted for local assembly, but without a specific
+/// variant hypothesis
+inline
+bool
+isSimpleBreakend(
+    const index_t idx)
+{
+    return ((idx==RIGHT_OPEN) || (idx==LEFT_OPEN));
+}
+
+inline
+bool
+isSameOrientation(
+    const index_t idx1,
+    const index_t idx2)
+{
+    if (! isSimpleBreakend(idx1)) return false;
+    if (! isSimpleBreakend(idx2)) return false;
+    return (idx1==idx2);
+}
+
+inline
+bool
+isOppositeOrientation(
+    const index_t idx1,
+    const index_t idx2)
+{
+    if (! isSimpleBreakend(idx1)) return false;
+    if (! isSimpleBreakend(idx2)) return false;
+    return (idx1!=idx2);
+}
+
+inline
+bool
+isInnies(
+    const bool isIdx1First,
+    const index_t idx1,
+    const index_t idx2)
+{
+    if (isIdx1First)
+    {
+        return ((idx1==RIGHT_OPEN) && (idx2==LEFT_OPEN));
+    }
+    else
+    {
+        return ((idx2==RIGHT_OPEN) && (idx1==LEFT_OPEN));
+    }
+}
+
+inline
+bool
+isOutties(
+    const bool isIdx1First,
+    const index_t idx1,
+    const index_t idx2)
+{
+    return isInnies((! isIdx1First),idx1,idx2);
+}
+
+}
+
+
+struct SVBreakend
+{
+    typedef SVBreakend self_t;
+
+    SVBreakend() :
+        state(SVBreakendState::UNKNOWN)
+    {}
+
+    bool
+    isIntersect(const self_t& rhs) const
+    {
+        if (state != rhs.state) return false;
+        if (SVBreakendState::UNKNOWN == state) return true;
+        return interval.isIntersect(rhs.interval);
+    }
+
+    bool
+    operator<(const self_t& rhs) const
+    {
+        if (state < rhs.state) return true;
+        if (state == rhs.state)
+        {
+            return (interval < rhs.interval);
+        }
+        return false;
+    }
+
+    bool
+    merge(
+        const SVBreakend& rhs,
+        const bool isExpandRegion)
+    {
+        if (! isIntersect(rhs)) return false;
+        if (isExpandRegion)
+        {
+            interval.range.merge_range(rhs.interval.range);
+        }
+        lowresEvidence.merge(rhs.lowresEvidence);
+        return true;
+    }
+
+    void
+    clear()
+    {
+        interval.clear();
+        state=SVBreakendState::UNKNOWN;
+        lowresEvidence.clear();
+    }
+
+    unsigned
+    getPairCount() const
+    {
+        return lowresEvidence.getVal(SVEvidenceType::PAIR);
+    }
+
+    unsigned
+    getLocalPairCount() const
+    {
+        return lowresEvidence.getVal(SVEvidenceType::LOCAL_PAIR);
+    }
+
+    /// return true if there is pair evidence, but it is only local
+    bool
+    isLocalPairOnly() const
+    {
+        return ((getLocalPairCount() > 0) && (getPairCount() == 0));
+    }
+
+    /// return true if all evidence for this breakend is local
+    bool
+    isLocalOnly() const
+    {
+        return (getLocalPairCount() == lowresEvidence.getTotal());
+    }
+
+    unsigned
+    getAnyNonPairCount() const
+    {
+        using namespace SVEvidenceType;
+
+        unsigned sum(0);
+        for (int i(0); i< SVEvidenceType::SIZE; ++i)
+        {
+            if (i == PAIR) continue;
+            if (i == LOCAL_PAIR) continue;
+            if (i == UNKNOWN) continue;
+            sum += lowresEvidence.getVal(i);
+        }
+        return sum;
+    }
+
+    // include any evidence type which defines a two-region hypothesis
+    unsigned
+    getSpanningCount() const
+    {
+        using namespace SVEvidenceType;
+
+        return (lowresEvidence.getVal(PAIR) +
+                lowresEvidence.getVal(CIGAR) +
+                lowresEvidence.getVal(SPLIT_ALIGN));
+    }
+
+    pos_t
+    getLeftSideOfBkptAdjustment() const
+    {
+        using namespace SVBreakendState;
+        switch (state)
+        {
+        case RIGHT_OPEN :
+            return 0;
+        case LEFT_OPEN :
+            return -1;
+        default:
+            return 0;
+        }
+    }
+
+public:
+
+    // if ! isPrecise() the interval is the X% confidence interval of the SV breakend, the interface allows for
+    // various probability distributions to back this interval, but these must be accessed via
+    // SVCandidate:
+    //
+    // csaunders 07-2015: observation of what's here rather than a policy description (where is the design discussion for this?)
+    // if isPrecise(), the the left end of the pos is the leftmost *mapped* base when the variant is fully left-shifted wrt this breakend.
+    // this means that the left most position concept is different for the right open and left-open cases. for right-open, this is the base
+    // before the breakend, for left-open this is the base after.
+    //
+    GenomeInterval interval;
+    SVBreakendState::index_t state;
+
+    SVBreakendLowResEvidence lowresEvidence;
+};
+
+std::ostream&
+operator<<(std::ostream& os, const SVBreakend& svb);
diff --git a/src/c++/lib/manta/SVCandidate.cpp b/src/c++/lib/manta/SVCandidate.cpp
new file mode 100644
index 0000000..54f1d45
--- /dev/null
+++ b/src/c++/lib/manta/SVCandidate.cpp
@@ -0,0 +1,65 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "manta/SVCandidate.hh"
+
+#include <iostream>
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVCandidate& svc)
+{
+    static const char indent('\t');
+    os << "SVCandidate:\n"
+       << indent << "isImprecise?: " << svc.isImprecise() << "\n"
+       << indent << "fwReads: " << svc.fwReads << " ; rvReads: " << svc.rvReads << "\n"
+       << indent << "index candidate:assemblyAlign:assemblySegment: " << svc.candidateIndex
+       << ":" << svc.assemblyAlignIndex
+       << ":" << svc.assemblySegmentIndex
+       << "\n";
+    if (! svc.isImprecise())
+    {
+        os << "\tAlignment: " << svc.insertAlignment << "\n"
+           << "\tBreakendInsertSeq: " << svc.insertSeq << "\n";
+    }
+    os << "\t" << svc.bp1 << "\n"
+       << "\t" << svc.bp2 << "\n";
+    return os;
+}
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVObservation& svc)
+{
+    os << static_cast<SVCandidate>(svc);
+    os << "SVObservation etype: " << SVEvidenceType::label(svc.evtype)
+       << " fragtype: " << FRAGSOURCE::label(svc.fragSource) << "\n";
+    return os;
+}
diff --git a/src/c++/lib/manta/SVCandidate.hh b/src/c++/lib/manta/SVCandidate.hh
new file mode 100644
index 0000000..d021706
--- /dev/null
+++ b/src/c++/lib/manta/SVCandidate.hh
@@ -0,0 +1,270 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/align_path.hh"
+#include "manta/SVBreakend.hh"
+
+#include <cstdlib>
+
+#include <iosfwd>
+#include <string>
+
+
+struct SVCandidate
+{
+#if 0
+    double
+    breakpointProb(pos_t x, pos_t y) const;
+#endif
+
+
+    /// if false, the breakend interval is at base-pair resolution
+    ///
+    /// false does not mean that the interval size is zero, a precise breakend interval range represents microhomology at the breakend site
+    bool
+    isImprecise() const
+    {
+        return _isImprecise;
+    }
+
+    bool
+    isIntersect(const SVCandidate& rhs) const
+    {
+        return ((bp1.isIntersect(rhs.bp1) && bp2.isIntersect(rhs.bp2)) ||
+                (bp1.isIntersect(rhs.bp2) && bp2.isIntersect(rhs.bp1)));
+    }
+
+    bool
+    isIntersect1to1(const SVCandidate& rhs) const
+    {
+        return (bp1.isIntersect(rhs.bp1) && bp2.isIntersect(rhs.bp2));
+    }
+
+    bool
+    merge(
+        const SVCandidate& rhs,
+        const bool isExpandRegion = true)
+    {
+        if (! isIntersect(rhs)) return false;
+
+        if (bp1.isIntersect(rhs.bp1))
+        {
+            bp1.merge(rhs.bp1, isExpandRegion);
+            bp2.merge(rhs.bp2, isExpandRegion);
+            fwReads += rhs.fwReads;
+            rvReads += rhs.rvReads;
+        }
+        else
+        {
+            bp1.merge(rhs.bp2, isExpandRegion);
+            bp2.merge(rhs.bp1, isExpandRegion);
+            fwReads += rhs.rvReads;
+            rvReads += rhs.fwReads;
+        }
+
+        _isImprecise = (isImprecise() || rhs.isImprecise());
+
+        return true;
+    }
+
+#if 0
+    void
+    clear()
+    {
+        _isImprecise = true;
+        bp1.clear();
+        bp2.clear();
+        insertSeq.clear();
+        candidateIndex=0;
+        assemblyAlignIndex=0;
+        assemblySegmentIndex=0;
+        isUnknownSizeInsertion = false;
+        unknownSizeInsertionLeftSeq.clear();
+        unknownSizeInsertionRightSeq.clear();
+        fwReads = 0;
+        rvReads = 0;
+        isSingleJunctionFilter = false;
+    }
+#endif
+
+    void
+    setPrecise()
+    {
+        _isImprecise = false;
+    }
+
+    bool
+    isForward() const
+    {
+        return (fwReads > rvReads);
+    }
+
+    bool
+    isStranded() const
+    {
+        return ((std::max(fwReads, rvReads)+1) / (std::min(fwReads, rvReads)+1) >= 2);
+    }
+
+    /// if 1 is added to the position of one breakend (within the homologous breakend range), then is 1 also added to the other breakend?
+    ///
+    /// if false then breakends move in opposite directions;
+    bool
+    isBreakendRangeSameShift() const
+    {
+        return (bp1.state != bp2.state);
+    }
+
+    int
+    centerSize() const
+    {
+        return std::abs(bp2.interval.range.center_pos() - bp1.interval.range.center_pos());
+    }
+
+    /// for precise SV report the full spanning count
+    /// for imprecise SV report spanning pairs only
+    ///
+    unsigned
+    getPostAssemblySpanningCount() const
+    {
+        if (isImprecise())
+        {
+            return bp1.getPairCount();
+        }
+        else
+        {
+            return bp1.getSpanningCount();
+        }
+    }
+
+private:
+    bool _isImprecise = true;
+
+public:
+
+    SVBreakend bp1;
+    SVBreakend bp2;
+
+    // this is either a micro-insertion in a large-scale SV, or the inserted sequence of an actual insertion
+    // in case bp1 and bp2 are on opposite strands (ie. an inversion) the insertSeq is oriented to the fwd strand for bp1
+    std::string insertSeq;
+
+    // for some small indels, the alignment becomes sufficiently complex that a CIGAR string provides better detail
+    // (this is provided for any small SV which is more complicated than a simple insert or deletion)
+    ALIGNPATH::path_t insertAlignment;
+
+    unsigned candidateIndex = 0; ///< low-res candidate index number, used to generate unique SV id
+    unsigned assemblyAlignIndex = 0; ///< high-res assembly index number of alignment, used to generate unique SV id
+    unsigned assemblySegmentIndex = 0; ///< high-res assembly index number of alignment segment, used to generate unique SV id
+
+    bool isUnknownSizeInsertion = false; ///< these insertions haven't been assembled all the way through
+
+    std::string unknownSizeInsertionLeftSeq; ///< for an incomplete insertion, this is the known left side of the insert sequence
+    std::string unknownSizeInsertionRightSeq; ///< for an incomplete insertion, this is the known right side of the insert sequence
+
+    unsigned fwReads = 0; ///< Number of reads (pairs) supporting a direction from bp1 to bp2 (used for stranded RNA data)
+    unsigned rvReads = 0; ///< Number of reads (pairs) directed from bp2 to bp1
+    /// filter out this sv candidate unless it's rescued by a multi-junction event:
+    bool isSingleJunctionFilter = false;
+};
+
+std::ostream&
+operator<<(std::ostream& os, const SVCandidate& svc);
+
+
+namespace FRAGSOURCE
+{
+enum index_t
+{
+    UNKNOWN,
+    READ1,
+    READ2,
+    PAIR
+};
+
+inline
+const char*
+label(const index_t i)
+{
+    switch (i)
+    {
+    case UNKNOWN:
+        return "unknown";
+    case READ1:
+        return "read1";
+    case READ2:
+        return "read2";
+    case PAIR:
+        return "pair";
+    default:
+        return "";
+    }
+}
+}
+
+/// when we extract an SV candidate from a single piece of evidence, it can be treated as a special 'observation' class:
+///
+struct SVObservation : public SVCandidate
+{
+    SVObservation() :
+        SVCandidate(),
+        evtype(SVEvidenceType::UNKNOWN),
+        fragSource(FRAGSOURCE::UNKNOWN)
+    {}
+
+#if 0
+    void
+    clear()
+    {
+        evtype = SVEvidenceType::UNKNOWN;
+        fragSource = FRAGSOURCE::UNKNOWN;
+        SVCandidate::clear();
+    }
+#endif
+
+    /// does the evidence for this SV observation come from a single read (eg. CIGAR read alignment) or
+    /// from both reads (eg. anomolous read pair)
+    bool
+    isSingleReadSource() const
+    {
+        using namespace FRAGSOURCE;
+        return ((fragSource == READ1) || (fragSource == READ2));
+    }
+
+    bool
+    isRead1Source() const
+    {
+        using namespace FRAGSOURCE;
+        return (fragSource == READ1);
+    }
+
+    SVEvidenceType::index_t evtype;
+    FRAGSOURCE::index_t fragSource;
+};
+
+
+std::ostream&
+operator<<(std::ostream& os, const SVObservation& svc);
+
diff --git a/src/c++/lib/manta/SVCandidateAssembler.cpp b/src/c++/lib/manta/SVCandidateAssembler.cpp
new file mode 100644
index 0000000..68b6fdc
--- /dev/null
+++ b/src/c++/lib/manta/SVCandidateAssembler.cpp
@@ -0,0 +1,810 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Ole Schulz-Trieglaff
+/// \author Chris Saunders
+///
+
+
+#include "blt_util/CircularCounter.hh"
+#include "blt_util/log.hh"
+#include "blt_util/seq_util.hh"
+#include "htsapi/align_path_bam_util.hh"
+#include "htsapi/SimpleAlignment_bam_util.hh"
+#include "manta/RemoteMateReadUtil.hh"
+#include "manta/ShadowReadFinder.hh"
+#include "manta/SVCandidateAssembler.hh"
+#include "manta/SVLocusScannerSemiAligned.hh"
+
+#include <iostream>
+
+//#define DEBUG_REMOTES
+//#define DEBUG_ASBL
+
+
+static
+double
+getRemoteRate(
+    const AllCounts& counts,
+    const unsigned sampleIndex)
+{
+    static const double pseudoTotal(10000.);
+    static const double pseudoRemote(100.);
+
+    const SampleReadInputCounts& input(counts.getSampleCounts(sampleIndex).input);
+    return (input.evidenceCount.remoteRecoveryCandidates+pseudoRemote)/(input.total()+pseudoTotal);
+}
+
+
+
+SVCandidateAssembler::
+SVCandidateAssembler(
+    const ReadScannerOptions& scanOpt,
+    const AssemblerOptions& assembleOpt,
+    const AlignmentFileOptions& alignFileOpt,
+    const std::string& statsFilename,
+    const std::string& chromDepthFilename,
+    const bam_header_info& bamHeader,
+    const AllCounts& counts,
+    const bool isRNA,
+    TimeTracker& remoteTime) :
+    _scanOpt(scanOpt),
+    _assembleOpt(assembleOpt),
+    _isAlignmentTumor(alignFileOpt.isAlignmentTumor),
+    _dFilter(chromDepthFilename, scanOpt.maxDepthFactor, bamHeader),
+    _dFilterRemoteReads(chromDepthFilename, scanOpt.maxDepthFactorRemoteReads, bamHeader),
+    _readScanner(_scanOpt, statsFilename, alignFileOpt.alignmentFilename, isRNA),
+    _remoteTime(remoteTime)
+{
+    // setup regionless bam_streams:
+    // setup all data for main analysis loop:
+    for (const std::string& afile : alignFileOpt.alignmentFilename)
+    {
+        // avoid creating shared_ptr temporaries:
+        streamPtr tmp(new bam_streamer(afile.c_str()));
+        _bamStreams.push_back(tmp);
+    }
+
+    const unsigned bamSize(_bamStreams.size());
+    _sampleBackgroundRemoteRate.resize(bamSize);
+    for (unsigned bamIndex(0); bamIndex<bamSize; ++bamIndex)
+    {
+        _sampleBackgroundRemoteRate[bamIndex] = getRemoteRate(counts,bamIndex);
+    }
+}
+
+
+
+/// approximate depth tracking -- don't bother reading the cigar string, just assume a perfect match of
+/// size read_size
+static
+void
+addReadToDepthEst(
+    const bam_record& bamRead,
+    const pos_t beginPos,
+    std::vector<unsigned>& depth)
+{
+    const pos_t endPos(beginPos+depth.size());
+    const pos_t refStart(bamRead.pos()-1);
+
+    const pos_t readSize(bamRead.read_size());
+    for (pos_t readIndex(std::max(0,(beginPos-refStart))); readIndex<readSize; ++readIndex)
+    {
+        const pos_t refPos(refStart+readIndex);
+        if (refPos>=endPos) return;
+        const pos_t depthIndex(refPos-beginPos);
+        assert(depthIndex>=0);
+
+        depth[depthIndex]++;
+    }
+}
+
+
+
+/// insert assembly reads after modifying for minimum basecall quality
+static
+bool
+insertAssemblyRead(
+    const uint8_t minQval,
+    const std::string& bamIndexStr,
+    const bam_record& bamRead,
+    const bool isReversed,
+    SVCandidateAssembler::ReadIndexType& readIndex,
+    AssemblyReadInput& reads)
+{
+    const char flag(bamRead.is_second() ? '2' : '1');
+    const std::string readKey = std::string(bamRead.qname()) + "_" + flag + "_" + bamIndexStr;
+
+    if (readIndex.find(readKey) != readIndex.end())
+    {
+        // this can be a normal case when for instance, spanning breakends overlap by a small amount
+#ifdef DEBUG_ASBL
+        log_os << __FUNCTION__ << ": WARNING: SmallAssembler read name collision : " << readKey << "\n";
+#endif
+        return false;
+    }
+
+    readIndex.insert(std::make_pair(readKey,reads.size()));
+
+    reads.push_back(bamRead.get_bam_read().get_string());
+
+    std::string& nread(reads.back());
+
+    const unsigned size(nread.size());
+    const uint8_t* qual(bamRead.qual());
+
+    for (unsigned i(0); i<size; ++i)
+    {
+        if (qual[i] < minQval) nread[i] = 'N';
+    }
+
+    if (isReversed) reverseCompStr(reads.back());
+    return true;
+}
+
+
+
+/// retrieve remote reads from a list of target loci in the bam
+static
+void
+recoverRemoteReads(
+    const AssemblerOptions& assembleOpt,
+    const unsigned maxNumReads,
+    const bool isLocusReversed,
+    const std::string& bamIndexStr,
+    bam_streamer& bamStream,
+    std::vector<RemoteReadInfo>& bamRemotes,
+    SVCandidateAssembler::ReadIndexType& readIndex,
+    AssemblyReadInput& reads,
+    RemoteReadCache& remoteReadsCache)
+{
+    // figure out what we can handle in a single region query:
+    std::sort(bamRemotes.begin(),bamRemotes.end());
+
+    typedef std::pair<GenomeInterval, std::vector<RemoteReadInfo> > BamRegionInfo_t;
+    std::vector<BamRegionInfo_t> bamRegions;
+
+#ifdef DEBUG_REMOTES
+    log_os << __FUNCTION__ << ": totalRemotes: " << bamRemotes.size() << "\n";
+#endif
+
+    int last_tid=-1;
+    int last_pos=-1;
+    for (const RemoteReadInfo& remote : bamRemotes)
+    {
+        assert(remote.tid >= 0);
+
+        if ((last_tid == remote.tid) && (last_pos+remote.readSize >= remote.pos))
+        {
+            assert(! bamRegions.empty());
+            GenomeInterval& interval(bamRegions.back().first);
+            interval.range.set_end_pos(remote.pos);
+
+            std::vector<RemoteReadInfo>& remotes(bamRegions.back().second);
+            remotes.push_back(remote);
+        }
+        else
+        {
+            std::vector<RemoteReadInfo> remotes;
+            remotes.push_back(remote);
+            bamRegions.push_back(std::make_pair(GenomeInterval(remote.tid,remote.pos,remote.pos),remotes));
+        }
+
+        last_tid=remote.tid;
+        last_pos=remote.pos;
+    }
+
+#ifdef DEBUG_REMOTES
+    log_os << __FUNCTION__ << ": totalregions: " << bamRegions.size() << "\n";
+#endif
+
+    for (BamRegionInfo_t& bregion : bamRegions)
+    {
+        const GenomeInterval& interval(bregion.first);
+        std::vector<RemoteReadInfo>& remotes(bregion.second);
+
+#ifdef DEBUG_REMOTES
+        log_os << __FUNCTION__ << ": begion interval " << interval << "\n";
+        for (const RemoteReadInfo& remote : remotes)
+        {
+            log_os << " remote: " << remote.tid << " " << remote.pos << "\n";
+        }
+
+        unsigned readCount(0);
+#endif
+
+        // set bam stream to new search interval:
+        bamStream.set_new_region(
+            interval.tid,
+            interval.range.begin_pos(),
+            interval.range.end_pos()+1);
+
+        while (bamStream.next())
+        {
+            if (reads.size() >= maxNumReads)
+            {
+#ifdef DEBUG_ASBL
+                log_os << __FUNCTION__ << ": WARNING: assembly read buffer full, skipping further input\n";
+#endif
+                break;
+            }
+
+            const bam_record& bamRead(*(bamStream.get_record_ptr()));
+
+            // we've gone past the last case:
+            if (bamRead.pos() > (remotes.back().pos+1)) break;
+
+            if (bamRead.isNonStrictSupplement()) continue;
+
+            for (RemoteReadInfo& remote : remotes)
+            {
+#ifdef DEBUG_REMOTES
+                readCount++;
+                if ((readCount%1000000) == 0) log_os << " counts: " << readCount << "\n";
+#endif
+                if (remote.isFound) continue;
+                if (bamRead.read_no() != remote.readNo) continue;
+                if (strcmp(bamRead.qname(),remote.qname.c_str()) != 0) continue;
+
+#ifdef DEBUG_REMOTES
+                log_os << __FUNCTION__ << ": found remote: " << remote.tid << " " << remote.pos << "\n";
+#endif
+                remote.isFound = true;
+
+                if (bamRead.map_qual() != 0) break;
+
+                // determine if we need to reverse:
+                bool isReversed(isLocusReversed);
+                if (bamRead.is_fwd_strand() == bamRead.is_mate_fwd_strand())
+                {
+                    isReversed = (! isReversed);
+                }
+
+                const bool isInserted = insertAssemblyRead(assembleOpt.minQval, bamIndexStr, bamRead, isReversed, readIndex, reads);
+                if (! isInserted) break;
+
+                /// add to the remote read cache used during PE scoring:
+                remoteReadsCache[remote.qname] = RemoteReadPayload(bamRead.read_no(), reads.back());
+
+                remote.isUsed = true;
+                break;
+            }
+        }
+#ifdef DEBUG_REMOTES
+        log_os << __FUNCTION__ << ": total reads traversed in region: " << readCount << "\n";
+#endif
+    }
+}
+
+
+
+#ifdef REMOTE_NOISE_RATE
+static
+bool
+isSampleSignal(
+    const CircularCounter& rate,
+    const double background)
+{
+    if (rate.dataSize() < 20) return false;
+    if (rate.maxCount() < 3) return false;
+
+    const bool regionRate(static_cast<double>(rate.maxCount())/rate.dataSize());
+
+    static const double fudge(3.0);
+    return (regionRate > (fudge*background));
+}
+#endif
+
+
+
+void
+SVCandidateAssembler::
+getBreakendReads(
+    const SVBreakend& bp,
+    const bool isLocusReversed,
+    const reference_contig_segment& refSeq,
+    const bool isSearchRemoteInsertionReads,
+    RemoteReadCache& remoteReadsCache,
+    ReadIndexType& readIndex,
+    AssemblyReadInput& reads) const
+{
+    // get search range:
+    known_pos_range2 searchRange;
+
+    // flanking regions specify areas where remote reads and shadows must have the right orientation
+    known_pos_range2 leftFlank,rightFlank;
+    {
+        // ideally this should be dependent on the insert size dist
+        // TODO: follow-up on trial value of 200 in a separate branch/build
+        // TODO: there should be a core search range and an expanded range for shadow/MAPQ0 only, shadow ranges should be left/right constrained to be consistent with center
+        static const size_t minIntervalSize(400);
+        if (bp.interval.range.size() >= minIntervalSize)
+        {
+            searchRange = bp.interval.range;
+        }
+        else
+        {
+            const size_t missing = minIntervalSize - bp.interval.range.size();
+            assert(missing > 0);
+            const size_t wobble = missing/2;
+            // FIXME : not sure what happens if (end_pos + wobble) > chromosome size?
+            static const size_t zero(0);
+            searchRange.set_range(std::max((bp.interval.range.begin_pos()-wobble),zero),(bp.interval.range.end_pos()+wobble));
+        }
+        leftFlank.set_range(searchRange.begin_pos(), bp.interval.range.begin_pos());
+        rightFlank.set_range(bp.interval.range.end_pos(), searchRange.end_pos());
+    }
+
+#ifdef DEBUG_ASBL
+    static const std::string logtag("SVLocusAssembler::getBreakendReads: ");
+    log_os << logtag << "searchRange " << searchRange << "\n";
+#endif
+
+    // for assembler reads, look for indels at report size or somewhat smaller
+    const unsigned minAssembleIndelSize(_scanOpt.minCandidateVariantSize/2);
+
+    // depending on breakend type we may only be looking for candidates in one direction:
+    bool isSearchForRightOpen(true);
+    bool isSearchForLeftOpen(true);
+    if (SVBreakendState::RIGHT_OPEN == bp.state)
+    {
+        isSearchForLeftOpen = false;
+    }
+
+    if (SVBreakendState::LEFT_OPEN == bp.state)
+    {
+        isSearchForRightOpen = false;
+    }
+
+    const bool isMaxDepth(_dFilter.isMaxDepthFilter());
+    float maxDepth(0);
+    float maxDepthRemoteReads(0);
+    if (isMaxDepth)
+    {
+        maxDepth = _dFilter.maxDepth(bp.interval.tid);
+        maxDepthRemoteReads = _dFilterRemoteReads.maxDepth(bp.interval.tid);
+    }
+    const pos_t searchBeginPos(searchRange.begin_pos());
+    const pos_t searchEndPos(searchRange.end_pos());
+    std::vector<unsigned> normalDepthBuffer(searchRange.size(),0);
+
+    bool isFirstTumor(false);
+
+    static const unsigned maxNumReads(1000);
+
+    const unsigned bamCount(_bamStreams.size());
+    std::vector<std::vector<RemoteReadInfo> > remoteReads(bamCount);
+
+    bool isMaxDepthRemoteReadsTriggered(false);
+#ifdef FWDREV_CHECK
+    /// sanity check that remote and shadow reads suggest an insertion pattern before doing an expensive remote recovery:
+    std::vector<int> fwdSemiReadPos;
+    std::vector<int> revSemiReadPos;
+#endif
+
+#ifdef REMOTE_NOISE_RATE
+    static const unsigned countWindow(200);
+    CircularCounter normalRemoteRate(countWindow);
+    CircularCounter tumorRemoteRate(countWindow);
+#endif
+
+    for (unsigned bamIndex(0); bamIndex < bamCount; ++bamIndex)
+    {
+        const bool isTumor(_isAlignmentTumor[bamIndex]);
+
+        // assert that the expected sample order is all normal samples first,
+        // followed by all tumor samples
+        if (isTumor) isFirstTumor=true;
+        assert((! isFirstTumor) || isTumor);
+
+        const std::string bamIndexStr(boost::lexical_cast<std::string>(bamIndex));
+
+        bam_streamer& bamStream(*_bamStreams[bamIndex]);
+
+        // set bam stream to new search interval:
+        bamStream.set_new_region(bp.interval.tid, searchBeginPos, searchEndPos);
+
+        ShadowReadFinder shadow(_scanOpt.minSingletonMapqCandidates, isSearchForLeftOpen, isSearchForRightOpen);
+
+#ifdef DEBUG_ASBL
+        unsigned indelCount(0);
+        unsigned semiAlignedCount(0);
+        unsigned shadowCount(0);
+#endif
+
+#ifdef REMOTE_NOISE_RATE
+        CircularCounter& remoteRate(isTumor ? tumorRemoteRate : normalRemoteRate);
+#endif
+
+        while (bamStream.next())
+        {
+            if (reads.size() >= maxNumReads)
+            {
+#ifdef DEBUG_ASBL
+                log_os << logtag << "WARNING: assembly read buffer full, skipping further input\n";
+#endif
+                break;
+            }
+
+            const bam_record& bamRead(*(bamStream.get_record_ptr()));
+
+            const pos_t refPos(bamRead.pos()-1);
+            if (refPos >= searchEndPos) break;
+
+            // Filter reads which won't be used in assembly:
+            //
+            if (isMaxDepth)
+            {
+                if (! isTumor)
+                {
+                    // depth estimation relies on a simple filtration criteria to stay in sync with the chromosome mean
+                    // depth estimates:
+                    if (! bamRead.is_unmapped())
+                    {
+                        addReadToDepthEst(bamRead, searchBeginPos, normalDepthBuffer);
+                    }
+                }
+            }
+
+            // don't filter out MAPQ0 because the split reads tend to have reduced mapping scores:
+            if (SVLocusScanner::isReadFilteredCore(bamRead)) continue;
+
+            if (bamRead.isNonStrictSupplement()) continue;
+
+            if (isMaxDepth)
+            {
+                assert(refPos<searchEndPos);
+                const pos_t depthOffset(refPos - searchBeginPos);
+                if ((depthOffset >= 0) && (normalDepthBuffer[depthOffset] > maxDepthRemoteReads))
+                {
+                    isMaxDepthRemoteReadsTriggered=true;
+                }
+                if ((depthOffset >= 0) && (normalDepthBuffer[depthOffset] > maxDepth))
+                {
+                    continue;
+                }
+            }
+
+            // Finished filtering reads, now test reads for assm evidence:
+            //
+            //
+            // TODO: if a dna fragment is shorter than the read length it can include adaptor sequence, there are no protections
+            // in here preventing this adaptor sequence from entering the assembly pool (CheckSemiAligned will reject any such pair
+            // as assembly evidence, but another test might pull the read in -- if this happens any soft-clipped read segments are
+            // dragged in as well.
+            //
+
+
+#ifdef REMOTE_NOISE_RATE
+            remoteRate.push(false);
+#endif
+
+            SimpleAlignment bamAlign(getAlignment(bamRead));
+
+            // check whether we do a separate search for the mate read
+            if (isSearchRemoteInsertionReads)
+            {
+                if (isMateInsertionEvidenceCandidate(bamRead, _scanOpt.minMapq))
+                {
+                    const known_pos_range2 bamRange(matchifyEdgeSoftClipRefRange(bamAlign));
+                    const bool isSearchForLeftOpenMate(isSearchForLeftOpen && (! leftFlank.is_range_intersect(bamRange)));
+                    const bool isSearchForRightOpenMate(isSearchForRightOpen && (! rightFlank.is_range_intersect(bamRange)));
+                    if (isMateInsertionEvidenceCandidate2(bamRead, isSearchForLeftOpenMate, isSearchForRightOpenMate))
+                    {
+#ifdef DEBUG_ASBL
+                        log_os << logtag << "Adding remote bamrec. idx: " << bamIndex << " rec: " << bamRead << '\n'
+                               << "\tmapq: " << bamRead.map_qual() << '\n'
+                               << "\tread: " << bamRead.get_bam_read() << '\n';
+#endif
+
+                        remoteReads[bamIndex].emplace_back(bamRead);
+#ifdef REMOTE_NOISE_RATE
+                        remoteRate.replace(true);
+#endif
+                    }
+
+#ifdef FWDREV_CHECK
+                    if (bamRead.is_fwd_strand())
+                    {
+                        fwdSemiReadPos.push_back(bamRead.pos()-1);
+                    }
+                    else
+                    {
+                        revSemiReadPos.push_back(bamRead.pos()-1);
+                    }
+#endif
+                }
+            }
+
+
+            // check for any indels in read:
+            bool isIndelKeeper(false);
+            if (! bamRead.is_unmapped())
+            {
+                using namespace ALIGNPATH;
+                for (const path_segment& ps : bamAlign.path)
+                {
+                    if (is_segment_type_indel(ps.type))
+                    {
+                        if (ps.length>=minAssembleIndelSize) isIndelKeeper = true;
+                        break;
+                    }
+                }
+            }
+
+            // this test covered semi-aligned and soft-clip and split reads together
+            bool isSemiAlignedKeeper(false);
+            if (! bamRead.is_unmapped())
+            {
+                static const unsigned minMismatchLen(4);
+
+                unsigned leadingMismatchLen(0);
+                unsigned trailingMismatchLen(0);
+                getSVBreakendCandidateSemiAlignedSimple(bamRead, bamAlign, refSeq, _readScanner.isUseOverlappingPairs(), leadingMismatchLen, trailingMismatchLen);
+
+                if (isSearchForRightOpen)
+                {
+                    if (trailingMismatchLen >= minMismatchLen) isSemiAlignedKeeper = true;
+                }
+
+                if (isSearchForLeftOpen)
+                {
+                    if (leadingMismatchLen >= minMismatchLen) isSemiAlignedKeeper = true;
+                }
+
+#if 0
+                if (isSemiAligned(bamRead,ref,_scanOpt.minSemiAlignedScoreCandidates))
+                {
+                    isSemiAlignedKeeper = true;
+                }
+#endif
+            }
+
+            const bool isShadowKeeper(shadow.check(bamRead));
+
+#if 0
+            bool isShadowKeeper(false);
+            if (shadow.isShadowAnchor(bamRead))
+            {
+                const known_pos_range2 bamRange(matchifyEdgeSoftClipRefRange(bamAlign));
+                const bool isSearchForLeftOpenShadow(isSearchForLeftOpen && (! leftFlank.is_range_intersect(bamRange)));
+                const bool isSearchForRightOpenShadow(isSearchForRightOpen && (! rightFlank.is_range_intersect(bamRange)));
+                if (shadow.isShadowAnchor(bamRead,isSearchForLeftOpenShadow,isSearchForRightOpenShadow))
+                {
+                    shadow.setAnchor(bamRead);
+                }
+            }
+            else
+            {
+                isShadowKeeper = shadow.isShadow(bamRead);
+            }
+#endif
+
+#ifdef FWDREV_CHECK
+            if (isShadowKeeper)
+            {
+                if (bamRead.is_mate_fwd_strand())
+                {
+                    fwdSemiReadPos.push_back(bamRead.mate_pos()-1);
+                }
+                else
+                {
+                    revSemiReadPos.push_back(bamRead.mate_pos()-1);
+                }
+            }
+#endif
+
+            if (! (isIndelKeeper
+                   || isSemiAlignedKeeper
+                   || isShadowKeeper
+                  )) continue;
+
+#ifdef DEBUG_ASBL
+            if (isIndelKeeper) ++indelCount;
+            if (isSemiAlignedKeeper) ++semiAlignedCount;
+            if (isShadowKeeper) ++shadowCount;
+
+            log_os << logtag << "Adding bamrec. idx: " << bamIndex << " rec: " << bamRead << '\n'
+                   << "\tmapq: " << bamRead.map_qual() << '\n'
+                   << "\tread: " << bamRead.get_bam_read() << '\n';
+            log_os << "isIndelKeeper: " << isIndelKeeper
+                   << " isSemiAlignedKeeper: " << isSemiAlignedKeeper
+                   << " isShadowKeeper: " << isShadowKeeper
+                   << '\n';
+#endif
+
+            bool isReversed(isLocusReversed);
+            // if shadow read, determine if we need to reverse:
+            if (isShadowKeeper)
+            {
+                if (bamRead.is_mate_fwd_strand())
+                {
+                    isReversed = (! isReversed);
+                }
+            }
+
+            insertAssemblyRead(getAssembleOpt().minQval, bamIndexStr, bamRead, isReversed, readIndex, reads);
+        }
+
+#ifdef DEBUG_ASBL
+        log_os << logtag << "bam " << bamIndex
+               << " indel: " << indelCount
+               << " semi-aligned " << semiAlignedCount
+               << " shadow " << shadowCount
+               << '\n';
+#endif
+    }
+
+
+    // sanity check the remote reads to see if we're going to recover them:
+    bool isRecoverRemotes(!isMaxDepthRemoteReadsTriggered);
+
+#ifdef REMOTE_NOISE_RATE
+    // check if peak rate for both samples is not above expectation
+    {
+        const bool isNormalSignal(isSampleSignal(normalRemoteRate,_normalBackgroundRemoteRate));
+        const bool isTumorSignal(isSampleSignal(tumorRemoteRate,_tumorBackgroundRemoteRate));
+        if (! (isNormalSignal || isTumorSignal))
+        {
+            isRecoverRemotes = false;
+        }
+    }
+#endif
+
+
+#ifdef FWDREV_CHECK
+    if (isRecoverRemotes)
+    {
+#ifdef DEBUG_REMOTES
+        for (unsigned bamIndex(0); bamIndex < bamCount; ++bamIndex)
+        {
+            unsigned fwdStrandRemotes(0);
+            const std::vector<RemoteReadInfo>& bamRemotes(remoteReads[bamIndex]);
+            for (const RemoteReadInfo& remote : bamRemotes)
+            {
+                if (remote.isLocalFwd)
+                {
+                    fwdStrandRemotes++;
+                }
+            }
+            log_os << __FUNCTION__ << ": remotes for bamIndex " << bamIndex << " total: " << bamRemotes.size() << " fwd: " << fwdStrandRemotes << "\n";
+        }
+#endif
+
+        // get a hack median and IQ for the remotes:
+        int fwdMedian(0);
+        int fwdRange(0);
+        if (! fwdSemiReadPos.empty())
+        {
+            std::sort(fwdSemiReadPos.begin(),fwdSemiReadPos.end());
+            fwdMedian=(fwdSemiReadPos[fwdSemiReadPos.size()/2]);
+            fwdRange=(fwdSemiReadPos[(fwdSemiReadPos.size()*3)/4]-fwdSemiReadPos[(fwdSemiReadPos.size()*1)/4]);
+        }
+
+        int revMedian(0);
+        int revRange(0);
+        if (! revSemiReadPos.empty())
+        {
+            std::sort(revSemiReadPos.begin(),revSemiReadPos.end());
+            revMedian=(revSemiReadPos[revSemiReadPos.size()/2]);
+            revRange=(revSemiReadPos[(revSemiReadPos.size()*3)/4]-revSemiReadPos[(revSemiReadPos.size()*1)/4]);
+        }
+
+        if ((fwdSemiReadPos.size() <= 2) || (revSemiReadPos.size() <= 2))
+        {
+            isRecoverRemotes=false;
+        }
+        else
+        {
+            const int diff(revMedian-fwdMedian);
+            if ((diff >= 2000) || (diff < 0))
+            {
+                isRecoverRemotes=false;
+            }
+            else if ((fwdRange >= 400) || (revRange >= 400))
+            {
+                isRecoverRemotes=false;
+            }
+        }
+    }
+#endif
+
+    /// recover any remote reads:
+#ifdef DEBUG_REMOTES
+    log_os << __FUNCTION__ << ": isRecoverRemotes: " << isRecoverRemotes << "\n";
+#endif
+
+    if (isRecoverRemotes)
+    {
+        const TimeScoper remoteTIme(_remoteTime);
+        for (unsigned bamIndex(0); bamIndex < bamCount; ++bamIndex)
+        {
+#ifdef DEBUG_REMOTES
+            log_os << __FUNCTION__ << ": starting remotes for bamindex: " << bamIndex << "\n";
+#endif
+            const std::string bamIndexStr(boost::lexical_cast<std::string>(bamIndex));
+
+            bam_streamer& bamStream(*_bamStreams[bamIndex]);
+
+            std::vector<RemoteReadInfo>& bamRemotes(remoteReads[bamIndex]);
+            recoverRemoteReads(
+                getAssembleOpt(),
+                maxNumReads, isLocusReversed, bamIndexStr, bamStream,
+                bamRemotes, readIndex, reads, remoteReadsCache);
+        }
+    }
+}
+
+
+
+void
+SVCandidateAssembler::
+assembleSingleSVBreakend(
+    const SVBreakend& bp,
+    const reference_contig_segment& refSeq,
+    const bool isSearchRemoteInsertionReads,
+    RemoteReadCache& remoteReads,
+    Assembly& as) const
+{
+    static const bool isBpReversed(false);
+    ReadIndexType readIndex;
+    AssemblyReadInput reads;
+    getBreakendReads(bp, isBpReversed, refSeq, isSearchRemoteInsertionReads, remoteReads, readIndex, reads);
+    AssemblyReadOutput readInfo;
+
+#ifdef ITERATIVE_ASSEMBLER
+    runIterativeAssembler(_assembleOpt, reads, readInfo, as);
+#else
+    runSmallAssembler(_assembleOpt, reads, readInfo, as);
+#endif
+
+}
+
+
+
+void
+SVCandidateAssembler::
+assembleSVBreakends(
+    const SVBreakend& bp1,
+    const SVBreakend& bp2,
+    const bool isBp1Reversed,
+    const bool isBp2Reversed,
+    const reference_contig_segment& refSeq1,
+    const reference_contig_segment& refSeq2,
+    Assembly& as) const
+{
+    static const bool isSearchRemoteInsertionReads(false);
+    RemoteReadCache remoteReads;
+    ReadIndexType readIndex;
+    AssemblyReadInput reads;
+    AssemblyReadReversal readRev;
+    getBreakendReads(bp1, isBp1Reversed, refSeq1, isSearchRemoteInsertionReads, remoteReads, readIndex, reads);
+    readRev.resize(reads.size(),isBp1Reversed);
+    getBreakendReads(bp2, isBp2Reversed, refSeq2, isSearchRemoteInsertionReads, remoteReads, readIndex, reads);
+    readRev.resize(reads.size(),isBp2Reversed);
+    AssemblyReadOutput readInfo;
+
+#ifdef ITERATIVE_ASSEMBLER
+    runIterativeAssembler(_assembleOpt, reads, readInfo, as);
+#else
+    runSmallAssembler(_assembleOpt, reads, readInfo, as);
+#endif
+}
diff --git a/src/c++/lib/manta/SVCandidateAssembler.hh b/src/c++/lib/manta/SVCandidateAssembler.hh
new file mode 100644
index 0000000..9d9d3b5
--- /dev/null
+++ b/src/c++/lib/manta/SVCandidateAssembler.hh
@@ -0,0 +1,129 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Ole Schulz-Trieglaff
+///
+
+#pragma once
+
+#include "applications/GenerateSVCandidates/GSCOptions.hh"
+#include "assembly/IterativeAssembler.hh"
+#include "assembly/SmallAssembler.hh"
+#include "blt_util/time_util.hh"
+#include "htsapi/bam_streamer.hh"
+#include "manta/ChromDepthFilterUtil.hh"
+#include "manta/SVCandidate.hh"
+#include "manta/SVCandidateAssemblyData.hh"
+#include "manta/SVCandidateSetData.hh"
+#include "manta/SVLocusScanner.hh"
+
+#include <vector>
+
+
+//#define ITERATIVE_ASSEMBLER
+#ifdef ITERATIVE_ASSEMBLER
+typedef IterativeAssemblerOptions AssemblerOptions;
+#else
+typedef SmallAssemblerOptions AssemblerOptions;
+#endif
+
+/// Assembles SV-candidate reads for single and paired SVBreakend objects
+///
+struct SVCandidateAssembler
+{
+    SVCandidateAssembler(
+        const ReadScannerOptions& scanOpt,
+        const AssemblerOptions& assembleOpt,
+        const AlignmentFileOptions& alignFileOpt,
+        const std::string& statsFilename,
+        const std::string& chromDepthFilename,
+        const bam_header_info& bamHeader,
+        const AllCounts& counts,
+        const bool isRNA,
+        TimeTracker& remoteTIme);
+
+    /**
+     * @brief Performs a de-novo assembly of a set of reads crossing a breakpoint.
+     *
+     * Iterates over a range of word lengths until the first successful assembly.
+     *
+     * If unused reads remain, the assembly is re-started using this subset.
+     */
+    void
+    assembleSingleSVBreakend(
+        const SVBreakend& bp,
+        const reference_contig_segment& refSeq,
+        const bool isSearchRemoteInsertionReads,
+        RemoteReadCache& remoteReads,
+        Assembly& as) const;
+
+    void
+    assembleSVBreakends(
+        const SVBreakend& bp1,
+        const SVBreakend& bp2,
+        const bool isBp1Reversed,
+        const bool isBp2Reversed,
+        const reference_contig_segment& refSeq1,
+        const reference_contig_segment& refSeq2,
+        Assembly& as) const;
+
+    const AssemblerOptions&
+    getAssembleOpt() const
+    {
+        return _assembleOpt;
+    }
+
+
+    typedef std::map<std::string,unsigned> ReadIndexType;
+
+private:
+    typedef std::shared_ptr<bam_streamer> streamPtr;
+
+    /// Collect reads crossing an SV breakpoint and add them to 'reads'
+    ///
+    /// \param[in] isReversed if true revcomp all reads on input
+    /// \param[in] refSeq this is used to find reads which have poorly aligned ends, such reads are added to the breakend assembly pool
+    /// \param[in] isSearchRemoteInsertionReads if true search the remote end of chimeric pairs for MAPQ0 insertion support
+    /// \param[out] remoteReadsCache stores any discovered remote reads so that these can be reused during scoring
+    /// \param[out] reads collected breakend assembly candidate reads
+    void
+    getBreakendReads(
+        const SVBreakend& bp,
+        const bool isReversed,
+        const reference_contig_segment& refSeq,
+        const bool isSearchRemoteInsertionReads,
+        RemoteReadCache& remoteReadsCache,
+        ReadIndexType& readIndex,
+        AssemblyReadInput& reads) const;
+
+    const ReadScannerOptions _scanOpt;
+    const AssemblerOptions _assembleOpt;
+    const std::vector<bool> _isAlignmentTumor;
+    const ChromDepthFilterUtil _dFilter;
+    const ChromDepthFilterUtil _dFilterRemoteReads;
+
+    // contains functions to detect/classify anomalous reads
+    SVLocusScanner _readScanner;
+    std::vector<streamPtr> _bamStreams;
+    TimeTracker& _remoteTime;
+
+    std::vector<double> _sampleBackgroundRemoteRate;
+};
diff --git a/src/c++/lib/manta/SVCandidateAssemblyData.cpp b/src/c++/lib/manta/SVCandidateAssemblyData.cpp
new file mode 100644
index 0000000..cbdaa16
--- /dev/null
+++ b/src/c++/lib/manta/SVCandidateAssemblyData.cpp
@@ -0,0 +1,38 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+
+#include "manta/SVCandidateAssemblyData.hh"
+
+#include <iostream>
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const LargeInsertionInfo& lii)
+{
+    os << "LargeInsertionInfo: isLeft/Right: " << lii.isLeftCandidate << "/" << lii.isRightCandidate
+       << " contigOffset: " << lii.contigOffset << " refOffset: " << lii.refOffset << " score: " << lii.score << "\n";
+    return os;
+}
diff --git a/src/c++/lib/manta/SVCandidateAssemblyData.hh b/src/c++/lib/manta/SVCandidateAssemblyData.hh
new file mode 100644
index 0000000..d5ee120
--- /dev/null
+++ b/src/c++/lib/manta/SVCandidateAssemblyData.hh
@@ -0,0 +1,181 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+
+#pragma once
+
+#include "assembly/AssembledContig.hh"
+#include "alignment/GlobalAligner.hh"
+#include "alignment/GlobalJumpAligner.hh"
+#include "blt_util/reference_contig_segment.hh"
+#include "manta/SVCandidate.hh"
+
+#include <iosfwd>
+#include <unordered_map>
+#include <vector>
+
+
+/// minimum set of information required to describe bp transformations between SVCandidate and its
+/// corresponding contig alignment
+///
+struct BPOrientation
+{
+    void
+    clear()
+    {
+        isBp2AlignedFirst=false;
+        isBp1Reversed=false;
+        isBp2Reversed=false;
+        isBp1First=true;
+        isStranded=false;
+    }
+
+    bool isBp2AlignedFirst = false; ///< should the contig on the fwd strand align bp2->bp1 (true) or bp1->bp2 (false)
+    bool isBp1Reversed = false; ///< should all bp1 reads be reversed for the contig to assemble correctly?
+    bool isBp2Reversed = false; ///< should all bp2 reads be reversed for the contig to assemble correctly?
+    bool isBp1First = true; ///< Is this candidate oriented from bp1 to bp2 (used in RNA)? Valid if isStranded==true
+    bool isStranded = false; /// Do we know the strand for this candidate (RNA)
+};
+
+
+
+struct LargeInsertionInfo
+{
+    void
+    clear()
+    {
+        isLeftCandidate=false;
+        isRightCandidate=false;
+        contigOffset=0;
+        refOffset=0;
+        score=0;
+    }
+
+    bool isLeftCandidate = false;
+    bool isRightCandidate = false;
+    unsigned contigOffset = 0; // if candidate, how far into the contig is the breakend?
+    unsigned refOffset = 0; // if candidate, how far from the start of the contig alignment is the breakend on reference?
+    int score = 0; // what is the alignment score of the contig up to the insertion breakpoint?
+};
+
+std::ostream&
+operator<<(std::ostream& os, const LargeInsertionInfo& lii);
+
+
+
+struct RemoteReadPayload
+{
+    RemoteReadPayload() :
+        readNo(0)
+    {}
+
+    RemoteReadPayload(
+        const int initReadNo,
+        const std::string& initReadSeq) :
+        readNo(initReadNo),
+        readSeq(initReadSeq)
+    {}
+
+    uint8_t readNo; // read no of the remote read, ie. the readno matching readSeq
+    std::string readSeq;
+};
+
+
+
+typedef std::unordered_map<std::string,RemoteReadPayload> RemoteReadCache;
+
+
+
+/// \brief Assembly data pertaining to a specific SV candidate
+///
+/// Assembly starts from a low-resolution SV candidate. This holds
+/// any persistent data related to the assembly process, such as data
+/// useful for scoring the candidate.
+///
+/// As a future design note -- it may be possible that the candidate is broken
+/// into multiple candidates during assembly-based hypothesis refinement, so
+/// this struct may cover multiple refined candidates (but always only one input
+/// candidate.
+///
+/// Also note that this class is representing both large scale/spanning SV's using the locus 'jump' aligner
+/// and small-scale local assemblies. This should probably be refactored into two parts, but
+/// it's hard to see the right strategy/interface until the scoring modules reach greater maturity
+/// (the scoring modules are the primary non-local consumer of information from this struct)
+///
+struct SVCandidateAssemblyData
+{
+    void
+    clear()
+    {
+        contigs.clear();
+        isCandidateSpanning=false;
+        isSpanning=false;
+        bporient.clear();
+        extendedContigs.clear();
+        smallSVAlignments.clear();
+        spanningAlignments.clear();
+        smallSVSegments.clear();
+        largeInsertInfo.clear();
+        remoteReads.clear();
+        bestAlignmentIndex=0;
+        bp1ref.clear();
+        bp2ref.clear();
+        svs.clear();
+        isOverlapSkip=false;
+    }
+
+    typedef AlignmentResult<int> SmallAlignmentResultType;
+    typedef JumpAlignmentResult<int> JumpAlignmentResultType;
+
+    typedef std::pair<unsigned,unsigned> CandidateSegmentType; ///< 'segments' only pertain to small-event alignments
+    typedef std::vector<CandidateSegmentType> CandidateSegmentSetType; ///< 'segments' only pertain to small-event alignments
+
+    Assembly contigs; ///< assembled contigs for both breakpoints
+
+    /// note that isCandidateSpanning can be != isSpanning in cases where the breakends are so close that the assembler decides to fuse
+    /// them and treat a spanning hypothesis as non-spanning:
+    bool isCandidateSpanning = false; ///< before assembly, was this a 2-locus event (spanning), or a local-assembly?
+    bool isSpanning = false; ///< at assembly time, was this treated as a 2-locus event (spanning), or a local-assembly?
+
+    BPOrientation bporient;
+
+    std::vector<std::string> extendedContigs; ///extended each contig's sequence by padding reference sequences on each end
+
+    std::vector<SmallAlignmentResultType> smallSVAlignments; ///< contig smallSV alignments, one per contig, may be empty
+    std::vector<JumpAlignmentResultType> spanningAlignments; ///< contig spanning alignments, one per contig, may be empty
+    std::vector<CandidateSegmentSetType> smallSVSegments; ///< list of indel sets, one per small alignment
+
+    std::vector<LargeInsertionInfo> largeInsertInfo; ///< data specific to searching for a large insertion candidate
+
+    RemoteReadCache remoteReads; ///< remote reads retrieved to improve assembly and scoring for this locus
+
+    unsigned bestAlignmentIndex = 0; ///< if non-empty sv candidate set, which contig/alignment produced them?
+
+    // expanded reference regions around the candidate SV breakend regions, for small events we use only bp1ref:
+    reference_contig_segment bp1ref;
+    reference_contig_segment bp2ref;
+
+    std::vector<SVCandidate> svs; ///< summarize candidate refined sv candidates
+
+    /// if true, assembly was skipped for this case because of an overlapping assembly
+    bool isOverlapSkip = false;
+};
diff --git a/src/c++/lib/manta/SVCandidateSetData.cpp b/src/c++/lib/manta/SVCandidateSetData.cpp
new file mode 100644
index 0000000..f99c414
--- /dev/null
+++ b/src/c++/lib/manta/SVCandidateSetData.cpp
@@ -0,0 +1,178 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+
+#include "common/Exceptions.hh"
+#include "manta/SVCandidateSetData.hh"
+
+#include <cassert>
+
+#include <iostream>
+#include <sstream>
+
+
+#ifdef DEBUG_SVDATA
+#include "blt_util/log.hh"
+#endif
+
+
+
+std::ostream&
+operator<<(std::ostream& os, const SVCandidateSetRead& svr)
+{
+    os << "SVCandidateSetRead: " << svr.bamrec << "\n";
+    os << "Read_index: " << svr.readIndex << "\n";
+    return os;
+}
+
+
+
+std::ostream&
+operator<<(std::ostream& os, const SVSequenceFragmentAssociation& sva)
+{
+    os << " svindex: " << sva.index << " evidenceType: " << SVEvidenceType::label(sva.evtype);
+    return os;
+}
+
+
+
+std::ostream&
+operator<<(std::ostream& os, const SVCandidateSetSequenceFragment& svp)
+{
+    os << "SVCandidateReadPair svIndices:";
+    for (const SVSequenceFragmentAssociation& sva : svp.svLink)
+    {
+        os << sva << "\n";
+    }
+    os << "\n";
+    os << "\tread1: " << svp.read1;
+    os << "\tread2: " << svp.read2;
+    return os;
+}
+
+
+
+SVCandidateSetSequenceFragment*
+SVCandidateSetSequenceFragmentSampleGroup::
+getSequenceFragment(
+    const pindex_t::key_type& key)
+{
+    const pindex_t::const_iterator kiter(_pairIndex.find(key));
+
+    if (kiter == _pairIndex.end())
+    {
+        /// don't add more pairs to the object once it's full:
+        if (isFull()) return nullptr;
+
+        _pairIndex[key] = _pairs.size();
+        _pairs.emplace_back();
+        return &(_pairs.back());
+    }
+    else
+    {
+        return &(_pairs[kiter->second]);
+    }
+}
+
+
+
+void
+SVCandidateSetSequenceFragmentSampleGroup::
+add(const bam_record& bamRead,
+    const bool isExpectRepeat,
+    const bool isNode1,
+    const bool isSubMapped)
+{
+    using namespace illumina::common;
+
+#ifdef DEBUG_SVDATA
+    log_os << "SVDataGroup adding: " << bamRead << "\n";
+#endif
+
+    SVCandidateSetSequenceFragment* fragPtr(getSequenceFragment(bamRead.qname()));
+    if (nullptr == fragPtr) return;
+
+    SVCandidateSetSequenceFragment& fragment(*fragPtr);
+
+    SVCandidateSetRead* targetReadPtr(nullptr);
+    if (2 == bamRead.read_no())
+    {
+        if (bamRead.isNonStrictSupplement())
+        {
+            fragment.read2Supplemental.emplace_back();
+            targetReadPtr = (&(fragment.read2Supplemental.back()));
+        }
+        else
+        {
+            targetReadPtr = (&(fragment.read2));
+        }
+    }
+    else
+    {
+        if (bamRead.isNonStrictSupplement())
+        {
+            fragment.read1Supplemental.emplace_back();
+            targetReadPtr = (&(fragment.read1Supplemental.back()));
+        }
+        else
+        {
+            targetReadPtr = (&(fragment.read1));
+        }
+    }
+
+    SVCandidateSetRead& targetRead(*targetReadPtr);
+    if (targetRead.isSet())
+    {
+        if (isExpectRepeat) return;
+
+        std::ostringstream oss;
+        oss << "Unexpected read name collision.\n"
+            << "\tExisting read: " << targetRead << "\n"
+            << "\tNew read: " << bamRead << "\n";
+        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+    }
+
+
+    targetRead.bamrec = bamRead;
+    targetRead.isNode1 = isNode1;
+    targetRead.isSubMapped = isSubMapped;
+    targetRead.readIndex = (isSubMapped ? _subMappedReadIndex : _mappedReadIndex);
+}
+
+
+
+bool
+SVCandidateSetData::
+setNewSearchInterval(const GenomeInterval& newSearch)
+{
+    bool retval(false);
+    for (const GenomeInterval& oldSearch : _searchIntervals)
+    {
+        if (oldSearch.isIntersect(newSearch))
+        {
+            retval=true;
+            break;
+        }
+    }
+    _searchIntervals.push_back(newSearch);
+    return retval;
+}
diff --git a/src/c++/lib/manta/SVCandidateSetData.hh b/src/c++/lib/manta/SVCandidateSetData.hh
new file mode 100644
index 0000000..a2ec162
--- /dev/null
+++ b/src/c++/lib/manta/SVCandidateSetData.hh
@@ -0,0 +1,315 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "alignment/Alignment.hh"
+#include "htsapi/bam_record.hh"
+#include "manta/SVBreakend.hh"
+#include "svgraph/GenomeInterval.hh"
+
+#include <iosfwd>
+#include <map>
+#include <vector>
+
+//#define DEBUG_SVDATA
+
+#ifdef DEBUG_SVDATA
+#include "blt_util/log.hh"
+#include <iostream>
+#endif
+
+
+/// A read associated with an SV associated set of regions
+///
+/// note this read could be linked with zero to many specific SVCandidates
+///
+struct SVCandidateSetRead
+{
+    bool
+    isSet() const
+    {
+        return (! bamrec.empty());
+    }
+
+    bool
+    isAnchored() const
+    {
+        return (isSet() && (!isSubMapped));
+    }
+
+    //realignment info, etc...
+    bam_record bamrec;
+
+    /// used to link this read to node1 or node2 in the original graph ordering,
+    /// note this is not the same as read1 and read2
+    bool isNode1 = true;
+
+    /// is mapq below the minimum normally required to use this read
+    bool isSubMapped = false;
+
+    /// relative index of this read compared to all reads with the same mapping status in bam-input order
+    double readIndex = 0;
+};
+
+std::ostream&
+operator<<(std::ostream& os, const SVCandidateSetRead& svr);
+
+
+/// capture details of the link between a sequence fragment and an SV
+struct SVSequenceFragmentAssociation
+{
+    typedef uint16_t index_t;
+
+    explicit
+    SVSequenceFragmentAssociation(
+        const index_t initIndex = 0,
+        const SVEvidenceType::index_t initEvtype = SVEvidenceType::UNKNOWN) :
+        index(initIndex),
+        evtype(initEvtype)
+    {}
+
+    index_t index;
+
+    /// is the association from anom read pair, split read, CIGAR, etc?
+    SVEvidenceType::index_t evtype;
+};
+
+std::ostream&
+operator<<(std::ostream& os, const SVSequenceFragmentAssociation& sva);
+
+
+/// A DNA/RNA fragment associated with an SV-associated set of regions
+///
+/// note this read could be linked with zero to many specific SVCandidates
+///
+struct SVCandidateSetSequenceFragment
+{
+    const char*
+    qname() const
+    {
+        if      (read1.isSet()) return read1.bamrec.qname();
+        else if (read2.isSet()) return read2.bamrec.qname();
+        else if ((! read1Supplemental.empty()) && read1Supplemental.front().isSet())
+        {
+            return read1Supplemental.front().bamrec.qname();
+        }
+        else if ((! read2Supplemental.empty()) && read2Supplemental.front().isSet())
+        {
+            return read2Supplemental.front().bamrec.qname();
+        }
+        return nullptr;
+    }
+
+    bool
+    isAnchored() const
+    {
+        return (read1.isAnchored() || read2.isAnchored());
+    }
+
+
+    bool
+    checkReadPair() const
+    {
+        if (read1.isSet() && read2.isSet())
+        {
+            if (read1.bamrec.target_id() != read2.bamrec.mate_target_id()) return false;
+            if (read2.bamrec.target_id() != read1.bamrec.mate_target_id()) return false;
+            if (read1.bamrec.pos() != read2.bamrec.mate_pos()) return false;
+            if (read2.bamrec.pos() != read1.bamrec.mate_pos()) return false;
+            if (read1.bamrec.is_fwd_strand() != read2.bamrec.is_mate_fwd_strand()) return false;
+            if (read2.bamrec.is_fwd_strand() != read1.bamrec.is_mate_fwd_strand()) return false;
+        }
+
+        return true;
+    }
+
+
+
+    std::vector<SVSequenceFragmentAssociation> svLink; ///< which SVs from the set are this molecule associated with?
+    SVCandidateSetRead read1;
+    std::vector<SVCandidateSetRead> read1Supplemental;
+    SVCandidateSetRead read2;
+    std::vector<SVCandidateSetRead> read2Supplemental;
+};
+
+std::ostream&
+operator<<(std::ostream& os, const SVCandidateSetSequenceFragment& svp);
+
+
+
+/// SVCandidateSet data associated with a specific bam-file/read-group
+///
+struct SVCandidateSetSequenceFragmentSampleGroup
+{
+    typedef std::vector<SVCandidateSetSequenceFragment> pair_t;
+    typedef pair_t::iterator iterator;
+    typedef pair_t::const_iterator const_iterator;
+
+    /// increment once for each eligible read considered
+    ///
+    /// this information is used to determine signal rate
+    /// so that it can be compared to sample specific noise
+    /// levels
+    void
+    increment(
+        const bool /*isNode1*/,
+        const bool isSubMapped)
+    {
+        if (isSubMapped)
+        {
+            _subMappedReadIndex++;
+        }
+        else
+        {
+            _mappedReadIndex++;
+        }
+    }
+
+    /// add a new bam record to the set:
+    void
+    add(const bam_record& bamRead,
+        const bool isExpectRepeat,
+        const bool isNode1,
+        const bool isSubMapped);
+
+    iterator
+    begin()
+    {
+        return _pairs.begin();
+    }
+
+    iterator
+    end()
+    {
+        return _pairs.end();
+    }
+
+    const_iterator
+    begin() const
+    {
+        return _pairs.begin();
+    }
+
+    const_iterator
+    end() const
+    {
+        return _pairs.end();
+    }
+
+    unsigned
+    size() const
+    {
+        return _pairs.size();
+    }
+
+    bool
+    isFull() const
+    {
+        return _isFull;
+    }
+
+    void
+    setFull()
+    {
+        _isFull = true;
+    }
+
+private:
+    typedef std::string bamqname_t;
+    typedef std::map<bamqname_t,unsigned> pindex_t;
+
+    /// get existing fragment or return pointer for a new fragment
+    ///
+    /// this will return null for new fragments when isFull() is true
+    ///
+    SVCandidateSetSequenceFragment*
+    getSequenceFragment(const pindex_t::key_type& key);
+
+    pair_t _pairs;
+    pindex_t _pairIndex;
+
+    bool _isFull = false; ///< this flag can be set if the object grows too large to insert more data into it
+
+    /// Tracks the relative index of all mapped reads as read off of the input bam file. This is used to provide a
+    /// relative index number for all reads supporting a particular SV candidate, so that supporting read density
+    /// can be estimated. For instance, if 3 reads supporting a breakpoint have mapped read counts of {100,200,300},
+    /// we can roughly estimate the 1/100 reads support the breakpoint.
+    double _mappedReadIndex = 0;
+
+    /// same as above for reads with mapping quality below theshold
+    double _subMappedReadIndex = 0;
+};
+
+
+/// Data gathered from a set of regions implicated to contain one or more SVs
+///
+/// Note these data are used for initial hypothesis generation, therefore the
+/// reads are potentially associated with zero to many specific SV candidates
+/// (although we expect any one read to usually be associated with no more
+/// one).
+///
+struct SVCandidateSetData
+{
+    /// get evidence associated with a specific sample group:
+    SVCandidateSetSequenceFragmentSampleGroup&
+    getDataGroup(const unsigned bamIndex)
+    {
+        data_t::iterator diter(_data.find(bamIndex));
+        if (diter != _data.end()) return diter->second;
+
+        std::pair<data_t::iterator,bool> diter2 = _data.insert(std::make_pair(bamIndex,SVCandidateSetSequenceFragmentSampleGroup()));
+        return diter2.first->second;
+    }
+
+    /// get evidence associated with a specific sample group:
+    const SVCandidateSetSequenceFragmentSampleGroup&
+    getDataGroup(const unsigned bamIndex) const
+    {
+        data_t::const_iterator diter(_data.find(bamIndex));
+        assert(diter != _data.end());
+        return diter->second;
+    }
+
+    void
+    clear()
+    {
+        _data.clear();
+        _searchIntervals.clear();
+    }
+
+    /// return true if this search interval overlaps with any previous
+    /// intervals
+    ///
+    /// if true, we know to expect repeated qnames
+    ///
+    bool
+    setNewSearchInterval(const GenomeInterval& newSearch);
+
+private:
+    typedef std::map<unsigned,SVCandidateSetSequenceFragmentSampleGroup> data_t;
+    data_t _data;
+
+    std::vector<GenomeInterval> _searchIntervals;
+};
diff --git a/src/c++/lib/manta/SVCandidateUtil.cpp b/src/c++/lib/manta/SVCandidateUtil.cpp
new file mode 100644
index 0000000..b62b75c
--- /dev/null
+++ b/src/c++/lib/manta/SVCandidateUtil.cpp
@@ -0,0 +1,140 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "manta/SVCandidateUtil.hh"
+
+
+
+bool
+isSVBelowMinSize(
+    const SVCandidate& sv,
+    const unsigned minSize)
+{
+    if (sv.bp1.interval.tid != sv.bp2.interval.tid) return false;
+
+    const pos_t bpSize(std::abs(sv.bp1.interval.range.center_pos() - sv.bp2.interval.range.center_pos())-1);
+    const pos_t insertSize(sv.insertSeq.size());
+
+    return (std::max(bpSize,insertSize) < static_cast<pos_t>(minSize));
+}
+
+bool
+isCis(const SVCandidate& sv)
+{
+    if (sv.bp1.interval.tid != sv.bp2.interval.tid) return false;
+    if (isSameOrientation(sv.bp1.state, sv.bp2.state)) return false;
+    const bool bp1Left(sv.bp1.interval.range.center_pos() < sv.bp2.interval.range.center_pos());
+    if ((sv.bp1.state == SVBreakendState::RIGHT_OPEN) && bp1Left) return true;
+    if ((sv.bp1.state == SVBreakendState::LEFT_OPEN) && !bp1Left) return true;
+    return false;
+}
+
+SV_TYPE::index_t
+getSVType(const SVCandidate& sv)
+{
+    using namespace SV_TYPE;
+
+    // remove failed local assemblies first:
+    if ((sv.bp1.state == SVBreakendState::UNKNOWN) || (sv.bp2.state == SVBreakendState::UNKNOWN))
+    {
+        return UNKNOWN;
+    }
+
+    const bool isBp1First(sv.bp1.interval.range.begin_pos() <= sv.bp2.interval.range.begin_pos());
+    const bool isBp2First(sv.bp2.interval.range.begin_pos() <= sv.bp1.interval.range.begin_pos());
+
+    if (sv.bp1.interval.tid != sv.bp2.interval.tid)
+    {
+        return INTERTRANSLOC;
+    }
+    else if (SVBreakendState::isSameOrientation(sv.bp1.state,sv.bp2.state))
+    {
+        return INVERSION;
+    }
+    else if (isBp1First || isBp2First)
+    {
+        if (isInnies(isBp1First,sv.bp1.state,sv.bp2.state))
+        {
+            return INDEL;
+        }
+        else if (isOutties(isBp1First,sv.bp1.state,sv.bp2.state))
+        {
+            return TANDUP;
+        }
+    }
+
+    return UNKNOWN;
+}
+
+
+
+namespace EXTENDED_SV_TYPE
+{
+
+/// is an indel classified as insert or delete?
+static
+index_t
+classifyIndel(
+    const SVCandidate& sv)
+{
+    const bool isBp1First(sv.bp1.interval.range.begin_pos()<=sv.bp2.interval.range.begin_pos());
+
+    const SVBreakend& bpA(isBp1First ? sv.bp1 : sv.bp2);
+    const SVBreakend& bpB(isBp1First ? sv.bp2 : sv.bp1);
+
+    const unsigned deleteSize(bpB.interval.range.begin_pos() - bpA.interval.range.begin_pos());
+    const unsigned insertSize(sv.insertSeq.size());
+
+    return ((deleteSize >= insertSize) ? DELETE : INSERT);
+}
+
+}
+
+
+
+EXTENDED_SV_TYPE::index_t
+getExtendedSVType(
+    const SVCandidate& sv,
+    const bool isForceIntraChromBnd)
+{
+    using namespace EXTENDED_SV_TYPE;
+
+    const SV_TYPE::index_t svType(getSVType(sv));
+
+    if (svType == SV_TYPE::INTERTRANSLOC) return INTERTRANSLOC;
+
+    if (isForceIntraChromBnd) return INTRATRANSLOC;
+
+    switch (svType)
+    {
+    case SV_TYPE::INVERSION:
+        return INVERSION;
+    case SV_TYPE::TANDUP:
+        return TANDUP;
+    case SV_TYPE::INDEL:
+        return classifyIndel(sv);
+    default:
+        return UNKNOWN;
+    }
+}
diff --git a/src/c++/lib/manta/SVCandidateUtil.hh b/src/c++/lib/manta/SVCandidateUtil.hh
new file mode 100644
index 0000000..d33899f
--- /dev/null
+++ b/src/c++/lib/manta/SVCandidateUtil.hh
@@ -0,0 +1,196 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "manta/SVCandidate.hh"
+
+
+/// returns true if sv is below minimum size:
+///
+bool
+isSVBelowMinSize(
+    const SVCandidate& sv,
+    const unsigned minSize);
+
+/// returns true if the sv is in cis orientation, i.e same chromosome
+/// and a right open breakend to the left of a left open breakend
+///
+bool
+isCis(const SVCandidate& sv);
+
+namespace SV_TYPE
+{
+enum index_t
+{
+    UNKNOWN,
+    INTERTRANSLOC,
+    INVERSION,
+    INDEL,
+    TANDUP,
+    COMPLEX
+};
+
+inline
+const char*
+label(const index_t idx)
+{
+    switch (idx)
+    {
+    case UNKNOWN:
+        return "UNKNOWN";
+    case INTERTRANSLOC:
+        return "INTERTRANSLOC";
+    case INVERSION:
+        return "INVERSION";
+    case INDEL:
+        return "INDEL";
+    case TANDUP:
+        return "TANDUP";
+    case COMPLEX:
+        return "COMPLEX";
+    default:
+        return "UNKNOWN";
+    }
+}
+
+}
+
+
+SV_TYPE::index_t
+getSVType(const SVCandidate& sv);
+
+
+
+
+/// extended SV_TYPE is like SV_TYPE but separates INDEL into INSERT and DELETE states
+namespace EXTENDED_SV_TYPE
+{
+
+enum index_t
+{
+    UNKNOWN,
+    INTERTRANSLOC,
+    INTRATRANSLOC,
+    INVERSION,
+    INSERT,
+    DELETE,
+    TANDUP
+};
+
+inline
+bool
+isSVTransloc(const index_t idx)
+{
+    switch (idx)
+    {
+    case INTERTRANSLOC:
+    case INTRATRANSLOC:
+        return true;
+    default:
+        return false;
+    }
+}
+
+inline
+bool
+isSVIndel(const index_t idx)
+{
+    switch (idx)
+    {
+    case INSERT:
+    case DELETE:
+        return true;
+    default:
+        return false;
+    }
+}
+
+// provide a shortened label (mostly from the VCF spec)
+inline
+const char*
+label(const index_t idx)
+{
+    switch (idx)
+    {
+    case INTERTRANSLOC:
+        return "BND";
+    case INTRATRANSLOC:
+        return "BND";
+    case INVERSION:
+        return "INV";
+    case INSERT:
+        return "INS";
+    case DELETE:
+        return "DEL";
+    case TANDUP:
+        return "DUP:TANDEM";
+    default:
+        return "UNKNOWN";
+    }
+}
+}
+
+EXTENDED_SV_TYPE::index_t
+getExtendedSVType(
+    const SVCandidate& sv,
+    const bool isForceIntraChromBnd = false);
+
+
+/// a 'spanning' sv means that this is a 'normal' breakend, where we have a
+/// hypothesis for the region and orientation of each end of the breakend
+///
+/// a 'spanning' sv type stands in contrast to a 'complex' sv type as described
+/// in the 'isComplexSV' function below
+inline
+bool
+isSpanningSV(const SVCandidate& sv)
+{
+    using namespace SVBreakendState;
+    return (isSimpleBreakend(sv.bp1.state) && isSimpleBreakend(sv.bp2.state));
+}
+
+
+/// complex in this case means that we have no specific hypothesis for the SV --
+/// it is just a single genomic region for which we schedule local assembly
+///
+inline
+bool
+isComplexSV(const SVCandidate& sv)
+{
+    using namespace SVBreakendState;
+    return ((sv.bp1.state == COMPLEX) && (sv.bp2.state == UNKNOWN));
+}
+
+/// returns 0 if not a deletion
+inline
+unsigned
+getDeleteSize(
+    const SVCandidate& sv)
+{
+    const EXTENDED_SV_TYPE::index_t svType(getExtendedSVType(sv));
+    if (svType != EXTENDED_SV_TYPE::DELETE) return 0;
+    return std::abs(sv.bp1.interval.range.begin_pos() - sv.bp2.interval.range.begin_pos());
+}
+
diff --git a/src/c++/lib/manta/SVLocusEvidenceCount.hh b/src/c++/lib/manta/SVLocusEvidenceCount.hh
new file mode 100644
index 0000000..7633292
--- /dev/null
+++ b/src/c++/lib/manta/SVLocusEvidenceCount.hh
@@ -0,0 +1,84 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+
+/// enumerate evidence type estimated on input for each sample
+struct SVLocusEvidenceCount
+{
+    void
+    clear()
+    {
+        total = 0;
+        ignored = 0;
+        anom = 0;
+        split = 0;
+        indel = 0;
+        assm = 0;
+        remoteRecoveryCandidates = 0;
+    }
+
+    void
+    merge(
+        const SVLocusEvidenceCount& rhs)
+    {
+        total += rhs.total;
+        ignored += rhs.ignored;
+        anom += rhs.anom;
+        split += rhs.split;
+        indel += rhs.indel;
+        assm += rhs.assm;
+        remoteRecoveryCandidates += rhs.remoteRecoveryCandidates;
+    }
+
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /* version */)
+    {
+        ar& total& ignored& anom& split& indel& assm& remoteRecoveryCandidates;
+    }
+
+    // using doubles for integral counts here because (1) counts are potentially very high and (2) exact counts don't matter
+
+    ///< total number of non-filtered anomalous reads scanned
+    double total = 0;
+
+    ///< total number of non-filtered reads ignored for SV purposes
+    double ignored = 0;
+
+    ///< total number of non-filtered anomalous reads scanned
+    double anom = 0;
+
+    ///< total number of non-filtered split (SA-tag) reads scanned
+    double split = 0;
+
+    ///< total number of non-filtered CIGAR large indel reads scanned
+    double indel = 0;
+
+    ///< total number of non-filtered semi-aligned reads scanned
+    double assm = 0;
+
+    ///< subset of anom. these are reads which qualify as candidates for remote recovery
+    double remoteRecoveryCandidates = 0;
+};
diff --git a/src/c++/lib/manta/SVLocusScanner.cpp b/src/c++/lib/manta/SVLocusScanner.cpp
new file mode 100644
index 0000000..51ad318
--- /dev/null
+++ b/src/c++/lib/manta/SVLocusScanner.cpp
@@ -0,0 +1,1445 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+/// \author Ole Schulz-Trieglaff
+/// \author Bret Barnes
+///
+
+#include "blt_util/align_path_util.hh"
+#include "blt_util/parse_util.hh"
+#include "blt_util/string_util.hh"
+#include "common/Exceptions.hh"
+#include "htsapi/align_path_bam_util.hh"
+#include "htsapi/bam_record_util.hh"
+#include "htsapi/SimpleAlignment_bam_util.hh"
+#include "manta/RemoteMateReadUtil.hh"
+#include "manta/SVCandidateUtil.hh"
+#include "manta/SVLocusScanner.hh"
+#include "manta/SVLocusScannerSemiAligned.hh"
+
+#include <iostream>
+
+//#define DEBUG_SCANNER
+
+//#define DEBUG_IS_SHADOW
+
+#ifdef DEBUG_SCANNER
+#include "blt_util/log.hh"
+#endif
+
+
+
+/// used for classifying fragments based on size so that they can be treated differently
+///
+namespace FragmentSizeType
+{
+static const float closePairFactor(4); ///< fragments within this factor of the minimum size cutoff are treated as 'close' pairs and receive a modified evidence count
+static const float veryClosePairFactor(1.5); ///< fragments within this factor of the minimum size cutoff are treated as 'reallyClose' pairs and receive a modified evidence count
+static const float maxNormalFactor(1.5);
+
+static const float minLargeEventRegionFactor(10);
+static const float maxLargeEventRegionFactor(20);
+
+
+static
+index_t
+classifySize(
+    const SVLocusScanner::CachedReadGroupStats& rgStats,
+    const int fragmentSize)
+{
+    if (fragmentSize < rgStats.properPair.min) return COMPRESSED;
+    if (fragmentSize > rgStats.properPair.max)
+    {
+        if (fragmentSize < rgStats.minDistantFragmentSize) return CLOSE;
+        return DISTANT;
+    }
+    return NORMAL;
+}
+
+static
+bool
+isLarge(const index_t i)
+{
+    switch (i)
+    {
+    case NORMAL:
+    case COMPRESSED:
+        return false;
+    default:
+        return true;
+    }
+}
+}
+
+
+
+static
+SVObservation
+GetSplitSVCandidate(
+    const ReadScannerDerivOptions& dopt,
+    const int32_t alignTid,
+    const pos_t leftPos,
+    const pos_t rightPos,
+    const SVEvidenceType::index_t& svSource,
+    const FRAGSOURCE::index_t& fragSource,
+    const bool isComplex = false)
+{
+    SVObservation sv;
+    SVBreakend& localBreakend(sv.bp1);
+    SVBreakend& remoteBreakend(sv.bp2);
+
+    localBreakend.interval.tid = alignTid;
+    remoteBreakend.interval.tid = alignTid;
+
+    localBreakend.lowresEvidence.add(svSource);
+    sv.evtype = svSource;
+    sv.fragSource = fragSource;
+
+    if (! isComplex)
+    {
+        remoteBreakend.lowresEvidence.add(svSource);
+        localBreakend.state = SVBreakendState::RIGHT_OPEN;
+        remoteBreakend.state = SVBreakendState::LEFT_OPEN;
+    }
+    else
+    {
+        localBreakend.state = SVBreakendState::COMPLEX;
+        remoteBreakend.state = SVBreakendState::UNKNOWN;
+    }
+
+    localBreakend.interval.range.set_begin_pos(std::max(0,leftPos-dopt.beforeBreakend));
+
+    if (! isComplex)
+    {
+        localBreakend.interval.range.set_end_pos(leftPos+dopt.afterBreakend);
+    }
+    else
+    {
+        localBreakend.interval.range.set_end_pos(rightPos+dopt.afterBreakend);
+    }
+
+    remoteBreakend.interval.range.set_begin_pos(std::max(0,rightPos-dopt.beforeBreakend));
+    remoteBreakend.interval.range.set_end_pos(rightPos+dopt.afterBreakend);
+
+    return sv;
+}
+
+
+
+/// determine, based on clipping in the cigar string, if this split alignment
+/// has its breakpoint on the downstream (right) end or the upstream (left) end
+static
+bool
+isSplitOpenDownstream(
+    const ALIGNPATH::path_t& align)
+{
+    using namespace ALIGNPATH;
+    ///TODO replace this heuristic with a better check (looking at all SA alignments at once)
+    return (apath_clip_lead_size(align) < apath_clip_trail_size(align));
+}
+
+
+
+static
+void
+updateSABreakend(
+    const ReadScannerDerivOptions& dopt,
+    const SimpleAlignment& align,
+    SVBreakend& breakend)
+{
+    // Need to use the match descriptors to determine if the split is upstream (i.e. 5' assuming fwd strand)
+    // of the current alignment (i.e. we are clipped on the left side) or downstream
+    // Below is the logic to convert these  to breakend candidates (everything is relative to the forward strand):
+    //
+    // DownStream => RIGHT_OPEN
+    // Upstream => LEFT_OPEN
+    //
+
+    const bool isSplitDownstream(isSplitOpenDownstream(align.path));
+
+    if (isSplitDownstream)
+    {
+        breakend.state = SVBreakendState::RIGHT_OPEN;
+    }
+    else
+    {
+        breakend.state = SVBreakendState::LEFT_OPEN;
+    }
+
+    breakend.interval.tid = align.tid;
+    // get the position of the breakend implied by the split, if split
+    // is downstream (see above) the split position is the end of this split
+    // read segment
+    int pos = align.pos;
+    if (isSplitDownstream)
+    {
+        using namespace ALIGNPATH;
+        pos += apath_ref_length(align.path);
+    }
+    breakend.interval.range.set_begin_pos(std::max(0,pos-dopt.beforeBreakend));
+    breakend.interval.range.set_end_pos(pos+dopt.afterBreakend);
+}
+
+
+
+/// get SV candidates from SA-tag split-read alignment
+static
+SVObservation
+GetSplitSACandidate(
+    const ReadScannerDerivOptions& dopt,
+    const bam_record& localRead,
+    const SimpleAlignment& localAlign,
+    const SimpleAlignment& remoteAlign,
+    const FRAGSOURCE::index_t fragSource)
+{
+    using namespace SVEvidenceType;
+    static const index_t svSource(SPLIT_ALIGN);
+
+    SVObservation sv;
+    sv.evtype = svSource;
+    sv.fragSource = fragSource;
+
+    SVBreakend& localBreakend(sv.bp1);
+    SVBreakend& remoteBreakend(sv.bp2);
+
+    // use single-side evidence, have to read the supp read to get the
+    // reverse edge. this protects against double-count:
+    localBreakend.lowresEvidence.add(svSource);
+
+    updateSABreakend(dopt, localAlign, localBreakend);
+    updateSABreakend(dopt, remoteAlign, remoteBreakend);
+
+    // If the local (bp1) alignment is split downstream (on the right side) then this read goes from bp1 -> bp2.
+    // If it is a forward read (e.g. read1 on + strand), this means it's a forward read for this event.
+    const bool isSplitDownstream(isSplitOpenDownstream(localAlign.path));
+    const bool isReadFw = (localRead.is_first() == localRead.is_fwd_strand());
+    if (dopt.isStranded)
+    {
+        if (isReadFw == isSplitDownstream)
+        {
+            sv.fwReads += 1;
+        }
+        else
+        {
+            sv.rvReads += 1;
+        }
+    }
+    return sv;
+}
+
+
+
+typedef std::map<std::string, int32_t> chromMap_t;
+
+
+
+static
+void
+parseSACandidatesFromRead(
+    const ReadScannerOptions& opt,
+    const bam_record& bamRead,
+    const chromMap_t& chromToIndex,
+    std::vector<SimpleAlignment>& splitAlign)
+{
+    using namespace ALIGNPATH;
+
+    splitAlign.clear();
+
+    std::vector<std::string> saVec;
+    {
+        static const char satag[] = {'S','A'};
+        const char* saStr(bamRead.get_string_tag(satag));
+        if (nullptr == saStr) return;
+
+        split_string(saStr, ';', saVec);
+        if ( (! saVec.empty()) && saVec.back().empty())
+        {
+            saVec.pop_back();
+        }
+    }
+
+    // Only handle a single split alignment right now.
+    // In the future we could sort the SA tags by order on the template, possibly
+    // also removing segments that map to two different areas,
+
+    if (saVec.size() > 1) return;
+
+    for (const std::string& sa : saVec)
+    {
+#ifdef DEBUG_SCANNER
+        log_os << __FUNCTION__ << ": SA STRING: " << sa << "\n";
+#endif
+        std::vector<std::string> saDat;
+        split_string(sa, ',', saDat);
+
+        assert((saDat.size() == 6) && "Unexpected number of SA tag values");
+
+        /// filter split reads with low MAPQ:
+        const unsigned saMapq(illumina::blt_util::parse_unsigned_str(saDat[4]));
+        if (saMapq < opt.minMapq) continue;
+
+        const chromMap_t::const_iterator ci(chromToIndex.find(saDat[0]));
+        assert(ci != chromToIndex.end());
+
+        splitAlign.emplace_back();
+        SimpleAlignment& sal(splitAlign.back());
+        sal.tid=(ci->second); // convert chr to int32_t via new bam header map
+        sal.pos = (illumina::blt_util::parse_int_str(saDat[1])-1);
+        {
+            const char saStrand(saDat[2][0]); // convert to char
+            assert((saStrand=='-') || (saStrand=='+'));
+            sal.is_fwd_strand = (saStrand == '+');
+        }
+
+        cigar_to_apath(saDat[3].c_str(), sal.path);
+    }
+}
+
+
+
+static
+void
+getSACandidatesFromRead(
+    const ReadScannerOptions& opt,
+    const ReadScannerDerivOptions& dopt,
+    const bam_record& localRead,
+    const SimpleAlignment& localAlign,
+    const FRAGSOURCE::index_t fragSource,
+    const chromMap_t& chromToIndex,
+    std::vector<SVObservation>& candidates)
+{
+    using namespace ALIGNPATH;
+
+    std::vector<SimpleAlignment> remoteAlign;
+    parseSACandidatesFromRead(opt, localRead, chromToIndex, remoteAlign);
+
+    if (remoteAlign.empty()) return;
+
+    // Only handle a single split alignment right now.
+    // In the future we could sort the SA tags by order on the template, possibly
+    // also removing segments that map to two different areas,
+    if (remoteAlign.size() > 1) return;
+
+    for (const auto& ral : remoteAlign)
+    {
+        candidates.push_back(GetSplitSACandidate(dopt, localRead, localAlign, ral, fragSource));
+#ifdef DEBUG_SCANNER
+        log_os << __FUNCTION__ << ": evaluating SA sv for inclusion: " << candidates.back() << "\n";
+#endif
+    }
+}
+
+
+
+/// extract large indels in alignment cigar string to internal candidate format
+static
+void
+getSVCandidatesFromReadIndels(
+    const ReadScannerOptions& opt,
+    const ReadScannerDerivOptions& dopt,
+    const SimpleAlignment& align,
+    const FRAGSOURCE::index_t fragSource,
+    std::vector<SVObservation>& candidates)
+{
+    using namespace SVEvidenceType;
+    static const index_t svSource(CIGAR);
+
+    using namespace ALIGNPATH;
+    const std::pair<unsigned,unsigned> ends(get_match_edge_segments(align.path));
+
+    unsigned pathIndex(0);
+    unsigned readOffset(0);
+    pos_t refHeadPos(align.pos);
+
+    const unsigned pathSize(align.path.size());
+    while (pathIndex<pathSize)
+    {
+        const path_segment& ps(align.path[pathIndex]);
+        const bool isBeginEdge(pathIndex<ends.first);
+        const bool isEndEdge(pathIndex>ends.second);
+        const bool isEdgeSegment(isBeginEdge || isEndEdge);
+
+        // in this case, swap means combined insertion/deletion
+        const bool isSwapStart(is_segment_swap_start(align.path,pathIndex));
+
+        if (isEdgeSegment && isSwapStart)
+        {
+            using namespace illumina::common;
+
+            std::ostringstream oss;
+            oss << "Can't process unexpected alignment pattern: " << align << "\n";
+            BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+        }
+
+        unsigned nPathSegments(1); // number of path segments consumed
+        if (isEdgeSegment)
+        {
+            // edge inserts are allowed for intron adjacent and grouper reads, edge deletions for intron adjacent only
+
+            if (ps.type == INSERT)
+            {
+                if (ps.length >= opt.minCandidateVariantSize)
+                {
+                    static const bool isComplex(true);
+                    candidates.push_back(GetSplitSVCandidate(dopt, align.tid, refHeadPos, refHeadPos, svSource, fragSource, isComplex));
+                }
+            }
+        }
+        else if (isSwapStart)
+        {
+            const swap_info sinfo(align.path,pathIndex);
+            if ((sinfo.delete_length >= opt.minCandidateVariantSize) || (sinfo.insert_length >= opt.minCandidateVariantSize))
+            {
+                candidates.push_back(GetSplitSVCandidate(dopt, align.tid, refHeadPos, refHeadPos+sinfo.delete_length, svSource, fragSource));
+            }
+
+            nPathSegments = sinfo.n_seg;
+        }
+        else if (is_segment_type_indel(align.path[pathIndex].type))
+        {
+            // regular indel:
+
+            if (ps.type == DELETE)
+            {
+                if (ps.length >= opt.minCandidateVariantSize)
+                {
+                    candidates.push_back(GetSplitSVCandidate(dopt, align.tid, refHeadPos, refHeadPos+ps.length, svSource, fragSource));
+                }
+            }
+            else if (ps.type == INSERT)
+            {
+                if (ps.length >= opt.minCandidateVariantSize)
+                {
+                    candidates.push_back(GetSplitSVCandidate(dopt, align.tid, refHeadPos, refHeadPos, svSource, fragSource));
+                }
+            }
+        }
+
+        for (unsigned i(0); i<nPathSegments; ++i)
+        {
+            increment_path(align.path, pathIndex, readOffset, refHeadPos);
+        }
+    }
+}
+
+
+
+/// extract poorly aligned read ends (semi-aligned and/or soft-clipped)
+/// to internal candidate format
+static
+void
+getSVCandidatesFromSemiAligned(
+    const ReadScannerOptions& opt,
+    const ReadScannerDerivOptions& dopt,
+    const bam_record& bamRead,
+    const SimpleAlignment& bamAlign,
+    const FRAGSOURCE::index_t fragSource,
+    const reference_contig_segment& refSeq,
+    std::vector<SVObservation>& candidates)
+{
+    unsigned leadingMismatchLen(0);
+    unsigned trailingMismatchLen(0);
+    pos_t leadingRefPos(0), trailingRefPos(0);
+    getSVBreakendCandidateSemiAligned(bamRead, bamAlign, refSeq,
+                                      dopt.isUseOverlappingPairs,
+                                      leadingMismatchLen, leadingRefPos,
+                                      trailingMismatchLen, trailingRefPos);
+
+    if ((leadingMismatchLen + trailingMismatchLen) >= bamRead.read_size()) return;
+
+    using namespace SVEvidenceType;
+    static const index_t svSource(SEMIALIGN);
+
+    // semi-aligned reads don't define a full hypothesis, so they're always evidence for a 'complex' ie. undefined, event
+    // in a fashion analogous to clipped reads
+    static const bool isComplex(true);
+
+    if (leadingMismatchLen >= opt.minSemiAlignedMismatchLen)
+    {
+        const pos_t pos(leadingRefPos);
+        candidates.push_back(GetSplitSVCandidate(dopt,bamRead.target_id(),pos,pos,svSource, fragSource,isComplex));
+    }
+
+    if (trailingMismatchLen >= opt.minSemiAlignedMismatchLen)
+    {
+        const pos_t pos(trailingRefPos);
+        candidates.push_back(GetSplitSVCandidate(dopt,bamRead.target_id(),pos,pos,svSource, fragSource,isComplex));
+    }
+}
+
+
+
+/// local utility class to analyze read pair relationship as lazily as possible
+struct AlignmentPairAnalyzer
+{
+    AlignmentPairAnalyzer(
+        const ReadScannerOptions& opt,
+        const ReadScannerDerivOptions& dopt,
+        const SVLocusScanner::CachedReadGroupStats& rstats)
+        : _opt(opt),
+          _dopt(dopt),
+          _rstats(rstats)
+    {}
+
+    void
+    reset(
+        const SimpleAlignment& local,
+        const SimpleAlignment& remote,
+        const bool isRemoteObserved,
+        const bool isForward) //Is the local read 1st in pair
+    {
+        _local= &local;
+        _remote= &remote;
+        _isRemote = isRemoteObserved;
+        _isForward = isForward;
+        _isScale = false;
+        _scale = 0;
+        _totalNonInsertSize = 0;
+        _localEndRefPos = 0;
+        _remoteEndRefPos = 0;
+    }
+
+    /// returns true if scale is valid (ie. the pair is anomalous)
+    bool
+    computeLargeEventRegionScale()
+    {
+        assert(isInit());
+        if (! _isScale) setLargeEventRegionScale();
+        return (_scale >= 0.);
+    }
+
+    void
+    getSVObservation(
+        SVObservation& sv)
+    {
+        assert(_isScale);
+        assert((_scale >= 0.) && (_scale <= 1.));
+
+        using namespace SVEvidenceType;
+        static const index_t svLocalPair(LOCAL_PAIR);
+        static const index_t svPair(PAIR);
+
+        sv.evtype = svLocalPair;
+        sv.fragSource = FRAGSOURCE::PAIR;
+
+        SVBreakend& localBreakend(sv.bp1);
+        SVBreakend& remoteBreakend(sv.bp2);
+
+        localBreakend.lowresEvidence.add(svLocalPair);
+
+        if (_dopt.isStranded)
+        {
+            if (_isForward)
+            {
+                sv.fwReads++;
+            }
+            else
+            {
+                sv.rvReads++;
+            }
+        }
+        if (_isRemote)
+        {
+            remoteBreakend.lowresEvidence.add(svLocalPair);
+            localBreakend.lowresEvidence.add(svPair);
+            remoteBreakend.lowresEvidence.add(svPair);
+            sv.evtype = svPair;
+        }
+
+        // set state and interval for each breakend:
+        const double breakendRegionMax(
+            (_scale*_rstats.largeScaleEventBreakendRegion.max) +
+            ((1.-_scale)*_rstats.breakendRegion.max));
+
+        const pos_t breakendSize(std::max(
+                                     static_cast<pos_t>(_opt.minPairBreakendSize),
+                                     static_cast<pos_t>(breakendRegionMax-_totalNonInsertSize)));
+
+        const pos_t localStartRefPos(localAlign().pos);
+        const pos_t remoteStartRefPos(remoteAlign().pos);
+
+        localBreakend.interval.tid = localAlign().tid;
+        // expected breakpoint range is from the end of the localRead alignment to the (probabilistic) end of the fragment:
+        if (localAlign().is_fwd_strand)
+        {
+            localBreakend.state = SVBreakendState::RIGHT_OPEN;
+            localBreakend.interval.range.set_begin_pos(_localEndRefPos);
+            localBreakend.interval.range.set_end_pos(_localEndRefPos + breakendSize);
+        }
+        else
+        {
+            localBreakend.state = SVBreakendState::LEFT_OPEN;
+            localBreakend.interval.range.set_end_pos(localStartRefPos);
+            localBreakend.interval.range.set_begin_pos(localStartRefPos - breakendSize);
+        }
+
+        remoteBreakend.interval.tid = remoteAlign().tid;
+        if (remoteAlign().is_fwd_strand)
+        {
+            remoteBreakend.state = SVBreakendState::RIGHT_OPEN;
+            remoteBreakend.interval.range.set_begin_pos(_remoteEndRefPos);
+            remoteBreakend.interval.range.set_end_pos(_remoteEndRefPos + breakendSize);
+        }
+        else
+        {
+            remoteBreakend.state = SVBreakendState::LEFT_OPEN;
+            remoteBreakend.interval.range.set_end_pos(remoteStartRefPos);
+            remoteBreakend.interval.range.set_begin_pos(remoteStartRefPos - breakendSize);
+        }
+    }
+
+    /// return the amount of unaligned sequence proceding the pair insert:
+    static
+    unsigned
+    distanceFromInsert(
+        const SimpleAlignment& al)
+    {
+        if (al.is_fwd_strand) return apath_read_trail_size(al.path);
+        else                  return apath_read_lead_size(al.path);
+    }
+
+private:
+
+    static
+    unsigned
+    getNonInsertSize(
+        const SimpleAlignment& al)
+    {
+        const unsigned readSize(apath_read_length(al.path));
+        return readSize - distanceFromInsert(al);
+    }
+
+    static
+    pos_t
+    getEndPos(
+        const SimpleAlignment& al)
+    {
+        return (al.pos + apath_ref_length(al.path));
+    }
+
+
+    void
+    setLargeEventRegionScale()
+    {
+        // different breakend sizes are used for long-range pairings vs short-ish range deletions,
+        // because of occasional long-fragment noise. This ramps from 0 to 1 as we go from short to
+        // long deletions sizes:
+        _isScale = true;
+        _scale = 1.0;
+
+        // find the read size excluding soft-clip/edge-insert on the 'inside' of the fragment
+        const unsigned localNoninsertSize(getNonInsertSize(localAlign()));
+        const unsigned remoteNoninsertSize(getNonInsertSize(remoteAlign()));
+
+        // total the 'used' read span of read1 and read2 (ie. the elements of the
+        // fragment that are not part of the insert between the reads)
+        //
+        _totalNonInsertSize = (localNoninsertSize+remoteNoninsertSize);
+
+        const pos_t localStartRefPos(localAlign().pos);
+        const pos_t remoteStartRefPos(remoteAlign().pos);
+        _localEndRefPos=getEndPos(localAlign());
+        _remoteEndRefPos=getEndPos(remoteAlign());
+
+        // check if fragment size is still anomalous after accounting for read alignment patterns:
+        if ((localAlign().tid != remoteAlign().tid) ||
+            (localAlign().is_fwd_strand == remoteAlign().is_fwd_strand)) return;
+
+        known_pos_range2 insertRange;
+        if (localAlign().is_fwd_strand)
+        {
+            insertRange.set_range(_localEndRefPos, remoteStartRefPos);
+        }
+        else
+        {
+            insertRange.set_range(_remoteEndRefPos, localStartRefPos);
+        }
+
+        // get length of fragment after accounting for any variants described directly in either read alignment:
+        // note insertRange can be negative, so don't use insertRange.size()
+        const pos_t cigarAdjustedFragmentSize(_totalNonInsertSize + (insertRange.end_pos() - insertRange.begin_pos()));
+
+        // this is an arbitrary point to start officially tagging 'outties' -- for now  we just want to avoid conventional small fragments from FFPE
+        const bool isOuttie(cigarAdjustedFragmentSize < 0);
+
+        if (isOuttie) return;
+
+        const bool isLargeFragment(cigarAdjustedFragmentSize > (_rstats.properPair.max + _opt.minCandidateVariantSize));
+
+        if (isLargeFragment)
+        {
+            _scale = _rstats.largeEventRegionScaler.getScale(cigarAdjustedFragmentSize);
+        }
+        else
+        {
+            _scale = -1.;
+        }
+    }
+
+    bool
+    isInit() const
+    {
+        return (_local != nullptr);
+    }
+
+    const SimpleAlignment&
+    localAlign() const
+    {
+        return *_local;
+    }
+
+    const SimpleAlignment&
+    remoteAlign() const
+    {
+        return *_remote;
+    }
+
+    const ReadScannerOptions& _opt;
+    const ReadScannerDerivOptions& _dopt;
+    const SVLocusScanner::CachedReadGroupStats& _rstats;
+    const SimpleAlignment* _local = nullptr;
+    const SimpleAlignment* _remote = nullptr;
+    bool _isRemote = false;
+    bool _isForward = false;
+    bool _isScale = false;
+    double _scale = 0.;
+    unsigned _totalNonInsertSize = 0;
+    pos_t _localEndRefPos = 0;
+    pos_t _remoteEndRefPos = 0;
+};
+
+
+
+/// get SV candidates from anomalous read pairs
+static
+void
+getSVCandidatesFromPair(
+    const ReadScannerOptions& opt,
+    const ReadScannerDerivOptions& dopt,
+    const SVLocusScanner::CachedReadGroupStats& rstats,
+    const bam_record& localRead,
+    const SimpleAlignment& localAlign,
+    const bam_record* remoteReadPtr,
+    std::vector<SVObservation>& candidates)
+{
+    if (! localRead.is_paired()) return;
+
+    // don't count paired end evidence from SA-split reads twice:
+    if (localRead.isNonStrictSupplement()) return;
+
+    if (localRead.is_unmapped() || localRead.is_mate_unmapped()) return;
+
+    // special case typically used for RNA-Seq analysis:
+    if (opt.isIgnoreAnomProperPair && localRead.is_proper_pair()) return;
+
+    // abstract remote alignment to SimpleAlignment object:
+    const bool isRemote(nullptr != remoteReadPtr);
+    const SimpleAlignment remoteAlign(isRemote ? getAlignment(*remoteReadPtr) : getFakeMateAlignment(localRead));
+
+    AlignmentPairAnalyzer pairInspector(opt, dopt, rstats);
+    pairInspector.reset(localAlign, remoteAlign, isRemote, localRead.is_first());
+
+    if (! pairInspector.computeLargeEventRegionScale()) return;
+
+    candidates.emplace_back();
+    pairInspector.getSVObservation(candidates.back());
+
+#ifdef DEBUG_SCANNER
+    log_os << __FUNCTION__ << " evaluating pair sv for inclusion: " << candidates.back() << "\n";
+#endif
+}
+
+
+
+#if 0
+/// get SV candidates from shadow/singleton pairs
+/// look for singletons, create candidateSV around conf. interval of shadow position
+/// cache singletons? might be needed to remove poor quality shadows.
+/// should be able to re-use code, follow soft-clipping example.
+static
+void
+getSVCandidatesFromShadow(
+    const ReadScannerOptions& opt,
+    const SVLocusScanner::CachedReadGroupStats& rstats,
+    const bam_record& localRead,
+    const SimpleAlignment& localAlign,
+    const bam_record* remoteReadPtr,
+    TrackedCandidates& candidates)
+{
+    using namespace SVEvidenceType;
+    static const index_t svSource(SHADOW);
+
+    static const bool isComplex(true);
+    pos_t singletonGenomePos(0);
+    int targetId(0);
+    if (NULL == remoteReadPtr)
+    {
+        if (!localRead.is_unmapped()) return;
+        // need to take care of this case
+        // need to rely on cached mapq and qname
+        return;
+        if (!isGoodShadow(localRead,lastMapq,lastQname,opt.minSingletonMapqGraph))
+        {
+            return;
+        }
+        singletonGenomePos = localAlign.pos;
+        targetId           = localRead.target_id();
+    }
+    else
+    {
+        // have both reads, straightforward from here
+        const bam_record& remoteRead(*remoteReadPtr);
+        const SimpleAlignment remoteAlign(remoteRead);
+
+        if (localRead.is_mate_unmapped())
+        {
+            // remote read is shadow candidate
+            if (!isGoodShadow(remoteRead,localRead.map_qual(),localRead.qname(),opt.minSingletonMapqGraph))
+            {
+                return;
+            }
+            singletonGenomePos = localAlign.pos;
+            targetId = remoteRead.target_id();
+        }
+        else if (localRead.is_unmapped())
+        {
+            // local is shadow candidate
+            if (!isGoodShadow(localRead,remoteRead.map_qual(),remoteRead.qname(),opt.minSingletonMapqGraph))
+            {
+                return;
+            }
+            singletonGenomePos = remoteAlign.pos;
+            targetId = localRead.target_id();
+        }
+        else
+        {
+            // none unmapped, skip this one
+            return;
+        }
+    }
+    const pos_t properPairRangeOffset = static_cast<pos_t>(rstats.properPair.min + (rstats.properPair.max-rstats.properPair.min)/2);
+    const pos_t shadowGenomePos = singletonGenomePos + properPairRangeOffset;
+    candidates.push_back(GetSplitSVCandidate(opt,targetId,shadowGenomePos,shadowGenomePos, svSource, isComplex));
+}
+#endif
+
+
+
+static
+void
+getSingleReadSVCandidates(
+    const ReadScannerOptions& opt,
+    const ReadScannerDerivOptions& dopt,
+    const bam_record& localRead,
+    const SimpleAlignment& localAlign,
+    const chromMap_t& chromToIndex,
+    const reference_contig_segment& refSeq,
+    std::vector<SVObservation>& candidates)
+{
+    using namespace illumina::common;
+
+    const bool isRead2(localRead.is_paired() && (localRead.read_no() == 2));
+    const FRAGSOURCE::index_t fragSource(isRead2 ? FRAGSOURCE::READ2 : FRAGSOURCE::READ1);
+
+    // - process any large indels in the localRead:
+    getSVCandidatesFromReadIndels(opt, dopt, localAlign, fragSource, candidates);
+#ifdef DEBUG_SCANNER
+    log_os << __FUNCTION__ << ": post-indels candidate_size: " << candidates.size() << "\n";
+#endif
+
+    // a read can provide SA split evidence or semi-aligned/soft-clip, but not both.
+    // this prevents split reads from triggering spurious local assembles. It is
+    // possible for a read to genuinely contain evidence of both, but this should
+    // be very rare.
+    if (localRead.isSASplit())
+    {
+        getSACandidatesFromRead(opt, dopt, localRead, localAlign, fragSource, chromToIndex,
+                                candidates);
+#ifdef DEBUG_SCANNER
+        log_os << __FUNCTION__ << ": post-split read candidate_size: " << candidates.size() << "\n";
+#endif
+    }
+    else
+    {
+        if (dopt.isSmallCandidates)
+        {
+            getSVCandidatesFromSemiAligned(opt, dopt, localRead, localAlign, fragSource, refSeq,
+                                           candidates);
+        }
+#ifdef DEBUG_SCANNER
+        log_os << __FUNCTION__ << ": post-semialigned candidate_size: " << candidates.size() << "\n";
+#endif
+    }
+}
+
+
+
+/// scan read record (and optionally its mate record) for SV evidence.
+//
+/// note that estimation is improved by the mate record (because we have the mate cigar string in this case)
+///
+static
+void
+getReadBreakendsImpl(
+    const ReadScannerOptions& opt,
+    const ReadScannerDerivOptions& dopt,
+    const SVLocusScanner::CachedReadGroupStats& rstats,
+    const bam_record& localRead,
+    const bam_record* remoteReadPtr,
+    const bam_header_info& bamHeader,
+    const reference_contig_segment& localRefSeq,
+    const reference_contig_segment* remoteRefSeqPtr,
+    std::vector<SVObservation>& candidates,
+    known_pos_range2& localEvidenceRange)
+{
+    using namespace illumina::common;
+
+#ifdef DEBUG_SCANNER
+    log_os << __FUNCTION__ << ": Starting read: " << localRead.qname() << "\n";
+#endif
+
+    const chromMap_t& chromToIndex(bamHeader.chrom_to_index);
+
+    candidates.clear();
+
+    /// get some basic derived information from the bam_record:
+    const SimpleAlignment localAlign(getAlignment(localRead));
+
+    try
+    {
+        getSingleReadSVCandidates(opt, dopt, localRead, localAlign, chromToIndex,
+                                  localRefSeq, candidates);
+
+        // run the same check on the read's mate if we have access to it
+        if (nullptr != remoteReadPtr)
+        {
+            const bam_record& remoteRead(*remoteReadPtr);
+            const SimpleAlignment remoteAlign(getAlignment(remoteRead));
+
+            if (nullptr == remoteRefSeqPtr)
+            {
+                static const char msg[] = "ERROR: remoteRefSeqPtr cannot be null";
+                BOOST_THROW_EXCEPTION(LogicException(msg));
+            }
+            getSingleReadSVCandidates(opt, dopt, remoteRead, remoteAlign,
+                                      chromToIndex, (*remoteRefSeqPtr),
+                                      candidates);
+        }
+
+        // process shadows:
+        //getSVCandidatesFromShadow(opt, rstats, localRead, localAlign,remoteReadPtr,candidates);
+
+        // - process anomalous read pairs:
+        getSVCandidatesFromPair(opt, dopt, rstats, localRead, localAlign, remoteReadPtr,
+                                candidates);
+    }
+    catch (...)
+    {
+        std::cerr << "ERROR: Exception caught while processing ";
+        if (nullptr == remoteReadPtr)
+        {
+            std::cerr << "single read record:\n"
+                      << '\t' << localRead << "\n";
+        }
+        else
+        {
+            std::cerr << " read pair records:\n"
+                      << '\t'  << localRead << "\n"
+                      << '\t' << (*remoteReadPtr) << "\n";
+        }
+        throw;
+    }
+
+#ifdef DEBUG_SCANNER
+    log_os << __FUNCTION__ << ": post-pair candidate_size: " << candidates.size() << "\n";
+#endif
+
+    // update localEvidence range:
+    // note this is only used if candidates were added, so there's no harm in setting it every time:
+    const unsigned localRefLength(apath_ref_length(localAlign.path));
+    const pos_t startRefPos(localRead.pos()-1);
+    const pos_t endRefPos(startRefPos+localRefLength);
+
+    localEvidenceRange.set_range(startRefPos,endRefPos);
+
+    const int maxTid(chromToIndex.size());
+
+    /// final chance to QC candidate set:
+    ///
+    for (const SVCandidate& sv : candidates)
+    {
+        bool isInvalidTid(false);
+        if ((sv.bp1.interval.tid < 0) || (sv.bp1.interval.tid >= maxTid))
+        {
+            isInvalidTid=true;
+        }
+        else if (sv.bp2.state != SVBreakendState::UNKNOWN)
+        {
+            if ((sv.bp2.interval.tid < 0) || (sv.bp2.interval.tid >= maxTid))
+            {
+                isInvalidTid=true;
+            }
+        }
+
+        bool isInvalidPos(false);
+        if (! isInvalidTid)
+        {
+            // note in the 'off-chromosome edge' test below we check for cases which are obviously way off
+            // the edge, but allow for a bit of over-edge mistakes to occur for the circular chromosomes
+            //
+            static const int offEdgePad(500);
+            const pos_t tid1Length(bamHeader.chrom_data[sv.bp1.interval.tid].length);
+            if ((sv.bp1.interval.range.end_pos() <= -offEdgePad) || (sv.bp1.interval.range.begin_pos() >= (tid1Length+offEdgePad)))
+            {
+                isInvalidPos=true;
+            }
+            else if (sv.bp2.state != SVBreakendState::UNKNOWN)
+            {
+                const pos_t tid2Length(bamHeader.chrom_data[sv.bp2.interval.tid].length);
+                if ((sv.bp2.interval.range.end_pos() <= -offEdgePad) || (sv.bp2.interval.range.begin_pos() >= (tid2Length+offEdgePad)))
+                {
+                    isInvalidPos=true;
+                }
+            }
+        }
+
+        if (isInvalidTid || isInvalidPos)
+        {
+            std::ostringstream oss;
+            if (isInvalidTid)
+            {
+                oss << "SVbreakend has unknown or invalid chromosome id in candidate sv.\n";
+            }
+            else
+            {
+                oss << "Cannot interpret BAM record: candidate SV breakend from BAM record is off chromosome edge.\n";
+            }
+
+            oss << "\tlocal_bam_record: " <<  localRead << "\n"
+                << "\tremote_bam record: ";
+            if (NULL==remoteReadPtr)
+            {
+                oss << "NONE";
+            }
+            else
+            {
+                oss << (*remoteReadPtr);
+            }
+            oss << "\n"
+                << "\tSVCandidate: " << sv << "\n";
+            BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+        }
+    }
+}
+
+
+
+/// Create an SVLocus for each potential SV event supported by the BAM record
+///
+/// the loci count should almost always be one (or, depending on input filtration, zero).
+/// multiple suggested loci from one read is more of a theoretical possibility than an
+/// expectation.
+///
+static
+void
+getSVLociImpl(
+    const ReadScannerOptions& opt,
+    const ReadScannerDerivOptions& dopt,
+    const SVLocusScanner::CachedReadGroupStats& rstats,
+    const bam_record& bamRead,
+    const bam_header_info& bamHeader,
+    const reference_contig_segment& refSeq,
+    std::vector<SVLocus>& loci,
+    SampleEvidenceCounts& eCounts)
+{
+    using namespace illumina::common;
+
+    loci.clear();
+    std::vector<SVObservation> candidates;
+    known_pos_range2 localEvidenceRange;
+
+    getReadBreakendsImpl(opt, dopt, rstats, bamRead, nullptr, bamHeader,
+                         refSeq, nullptr, candidates, localEvidenceRange);
+
+#ifdef DEBUG_SCANNER
+    log_os << __FUNCTION__ << ": candidate_size: " << candidates.size() << "\n";
+#endif
+
+    // translate SVCandidate to a simpler form for use
+    // in the SV locus graph:
+    for (const SVCandidate& cand : candidates)
+    {
+        const bool isCandComplex(isComplexSV(cand));
+
+        const SVBreakend& localBreakend(cand.bp1);
+        const SVBreakend& remoteBreakend(cand.bp2);
+
+        if ((0==localBreakend.interval.range.size()) ||
+            ((! isCandComplex) && (0==remoteBreakend.interval.range.size())))
+        {
+            std::ostringstream oss;
+            oss << "Unexpected breakend pattern proposed from bam record.\n"
+                << "\tlocal_breakend: " << localBreakend << "\n"
+                << "\tremote_breakend: " << remoteBreakend << "\n"
+                << "\tbam_record: " << bamRead << "\n";
+            BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+        }
+
+        // update evidence stats:
+        for (int i(0); i< SVEvidenceType::SIZE; ++i)
+        {
+            eCounts.eType[i] += localBreakend.lowresEvidence.getVal(i);
+        }
+
+        // determine the evidence weight of this candidate:
+        unsigned localEvidenceWeight(0);
+        unsigned remoteEvidenceWeight(0);
+
+        if (localBreakend.getAnyNonPairCount() != 0)
+        {
+            localEvidenceWeight = SVObservationWeights::internalReadEvent;
+            if (remoteBreakend.getAnyNonPairCount() != 0)
+            {
+                remoteEvidenceWeight = SVObservationWeights::internalReadEvent;
+            }
+        }
+        else if (localBreakend.getLocalPairCount() != 0)
+        {
+            bool isClose(false);
+            if (is_innie_pair(bamRead))
+            {
+                isClose = (std::abs(bamRead.template_size()) < rstats.minDistantFragmentSize);
+            }
+
+            unsigned thisWeight(SVObservationWeights::readPair);
+            if (isClose)
+            {
+                thisWeight = SVObservationWeights::closeReadPair;
+                eCounts.closeCount += 1;
+            }
+
+            localEvidenceWeight = thisWeight;
+            if (remoteBreakend.getLocalPairCount() != 0)
+            {
+                remoteEvidenceWeight = thisWeight;
+            }
+        }
+
+        // finally, create the graph locus:
+        SVLocus locus;
+        // set local breakend estimate:
+        const NodeIndexType localBreakendNode(locus.addNode(localBreakend.interval));
+        locus.setNodeEvidence(localBreakendNode,localEvidenceRange);
+
+        if (isCandComplex)
+        {
+            locus.linkNodes(localBreakendNode,localBreakendNode,localEvidenceWeight);
+        }
+        else
+        {
+            // set remote breakend estimate:
+            const NodeIndexType remoteBreakendNode(locus.addNode(remoteBreakend.interval));
+            locus.linkNodes(localBreakendNode,remoteBreakendNode,localEvidenceWeight,remoteEvidenceWeight);
+
+            locus.mergeSelfOverlap();
+        }
+
+#ifdef DEBUG_SCANNER
+        log_os << __FUNCTION__ << ": adding Locus: " << locus << "\n";
+#endif
+        loci.push_back(locus);
+    }
+}
+
+
+
+/// compute one of the scanner's fragment ranges:
+static
+void
+setRGRange(
+    const SizeDistribution& fragStats,
+    const float qprob,
+    SVLocusScanner::Range& range)
+{
+    range.min=fragStats.quantile(qprob);
+    range.max=fragStats.quantile((1-qprob));
+    if (range.min<0.) range.min = 0;
+    assert(range.max>0.);
+}
+
+
+
+SVLocusScanner::
+SVLocusScanner(
+    const ReadScannerOptions& opt,
+    const std::string& statsFilename,
+    const std::vector<std::string>& /*alignmentFilename*/,
+    const bool isRNA,
+    const bool isStranded) :
+    _opt(opt),
+    _dopt(opt, isRNA, isStranded)
+{
+    using namespace illumina::common;
+
+    // pull in insert stats:
+    _rss.load(statsFilename.c_str());
+
+    // precompute frequently used insert stats for each rg:
+    const unsigned rgCount(_rss.size());
+    for (unsigned rgIndex(0); rgIndex<rgCount; rgIndex++)
+    {
+        /// TODO: add check that the filenames in the stats file are a complete match to alignmentFilename
+
+        const SizeDistribution& rgDistro(getFragSizeDistro(rgIndex));
+
+        _stats.resize(_stats.size()+1);
+        CachedReadGroupStats& rgStats(_stats.back());
+        setRGRange(rgDistro, _opt.breakendEdgeTrimProb, rgStats.breakendRegion);
+        setRGRange(rgDistro, _opt.largeScaleEventBreakendEdgeTrimProb, rgStats.largeScaleEventBreakendRegion);
+        setRGRange(rgDistro, _opt.properPairTrimProb, rgStats.properPair);
+        setRGRange(rgDistro, _opt.evidenceTrimProb, rgStats.evidencePair);
+        setRGRange(rgDistro, 0.05f, rgStats.fifthPerc);
+
+        if ((rgIndex==0) || (rgStats.fifthPerc.min < _fifthPerc.min))
+        {
+            _fifthPerc.min = rgStats.fifthPerc.min;
+        }
+        if ((rgIndex==0) || (rgStats.fifthPerc.max > _fifthPerc.max))
+        {
+            _fifthPerc.max = rgStats.fifthPerc.max;
+        }
+
+        rgStats.shadowSearchRange = rgDistro.quantile(1-(_opt.shadowSearchRangeProb))*_opt.shadowSearchRangeFactor;
+
+        assert(rgStats.shadowSearchRange > 0);
+
+        rgStats.minVeryCloseFragmentSize = static_cast<int>(rgStats.properPair.max*FragmentSizeType::maxNormalFactor);
+        rgStats.minCloseFragmentSize = static_cast<int>(rgStats.properPair.max*FragmentSizeType::veryClosePairFactor);
+        rgStats.minDistantFragmentSize = static_cast<int>(rgStats.properPair.max*FragmentSizeType::closePairFactor);
+
+        assert(rgStats.minDistantFragmentSize > rgStats.properPair.max);
+
+        //rgStats.veryCloseEventScaler.init(rgStats.minVeryCloseFragmentSize, rgStats.minCloseFragmentSize);
+
+        const int largeEventRegionMin(rgStats.properPair.max*FragmentSizeType::minLargeEventRegionFactor);
+        const int largeEventRegionMax(rgStats.properPair.max*FragmentSizeType::maxLargeEventRegionFactor);
+
+        rgStats.largeEventRegionScaler.init(largeEventRegionMin, largeEventRegionMax);
+    }
+}
+
+
+
+bool
+SVLocusScanner::
+isProperPair(
+    const bam_record& bamRead,
+    const unsigned defaultReadGroupIndex) const
+{
+    if (! is_innie_pair(bamRead)) return false;
+
+    const Range& ppr(_stats[defaultReadGroupIndex].properPair);
+    const int32_t fragmentSize(std::abs(bamRead.template_size()));
+
+    // we're seeing way to much large fragment garbage in cancers to use
+    // vanilla proper pair criteria, push the max fragment size out a bit for now:
+    static const float maxAnomFactor(1.5);
+    if (fragmentSize > static_cast<int32_t>(maxAnomFactor*ppr.max)) return false;
+    if (fragmentSize < ppr.min) return false;
+
+    return true;
+}
+
+
+
+FragmentSizeType::index_t
+SVLocusScanner::
+_getFragmentSizeType(
+    const bam_record& bamRead,
+    const unsigned defaultReadGroupIndex) const
+{
+    using namespace FragmentSizeType;
+    if (bamRead.target_id() != bamRead.mate_target_id()) return DISTANT;
+    const int32_t fragmentSize(std::abs(bamRead.template_size()));
+    return classifySize(_stats[defaultReadGroupIndex], fragmentSize);
+}
+
+
+bool
+SVLocusScanner::
+_isLargeFragment(
+    const bam_record& bamRead,
+    const unsigned defaultReadGroupIndex) const
+{
+    return FragmentSizeType::isLarge(_getFragmentSizeType(bamRead,defaultReadGroupIndex));
+}
+
+
+
+bool
+SVLocusScanner::
+isNonCompressedAnomalous(
+    const bam_record& bamRead,
+    const unsigned defaultReadGroupIndex) const
+{
+    if (! is_mapped_pair(bamRead)) return false;
+    const bool isAnomalous(! isProperPair(bamRead,defaultReadGroupIndex));
+    const bool isInnie(is_innie_pair(bamRead));
+    const bool isLarge(_isLargeFragment(bamRead,defaultReadGroupIndex));
+
+    // exclude innie read pairs which are anomalously short:
+    return (isAnomalous && ((! isInnie) || isLarge));
+}
+
+
+
+bool
+SVLocusScanner::
+isLocalIndelEvidence(
+    const SimpleAlignment& bamAlign) const
+{
+    using namespace ALIGNPATH;
+    for (const path_segment& ps : bamAlign.path)
+    {
+        if (ps.type == INSERT || ps.type == DELETE)
+        {
+            if (ps.length>=_opt.minCandidateVariantSize) return true;
+        }
+    }
+    return false;
+}
+
+
+
+bool
+SVLocusScanner::
+isSemiAlignedEvidence(
+    const bam_record& bamRead,
+    const SimpleAlignment& bamAlign,
+    const reference_contig_segment& refSeq) const
+{
+    unsigned leadingMismatchLen(0), trailingMismatchLen(0);
+    getSVBreakendCandidateSemiAlignedSimple(bamRead, bamAlign, refSeq, _dopt.isUseOverlappingPairs,
+                                            leadingMismatchLen, trailingMismatchLen);
+    return ((leadingMismatchLen >= _opt.minSemiAlignedMismatchLen) || (trailingMismatchLen >= _opt.minSemiAlignedMismatchLen));
+}
+
+
+
+bool
+SVLocusScanner::
+isLocalAssemblyEvidence(
+    const bam_record& bamRead,
+    const reference_contig_segment& refSeq) const
+{
+    const SimpleAlignment bamAlign(getAlignment(bamRead));
+    if (isLocalIndelEvidence(bamAlign)) return true;
+    if (isSemiAlignedEvidence(bamRead, bamAlign, refSeq)) return true;
+    /// TODO Add shadow evidence -- complexity here is keeping locus merging under control due to the large breakend location variance
+    /// suggested by shadows
+
+    return false;
+}
+
+
+
+bool
+SVLocusScanner::
+isSVEvidence(
+    const bam_record& bamRead,
+    const unsigned defaultReadGroupIndex,
+    const reference_contig_segment& refSeq,
+    SVLocusEvidenceCount* incountsPtr) const
+{
+    // exclude innie read pairs which are anomalously short:
+    const bool isAnom(isNonCompressedAnomalous(bamRead,defaultReadGroupIndex));
+    const bool isSplit(bamRead.isSASplit());
+    getAlignment(bamRead,_bamAlign);
+    const bool isIndel(isLocalIndelEvidence(_bamAlign));
+    const bool isAssm((_dopt.isSmallCandidates) && ((!isSplit) && isSemiAlignedEvidence(bamRead, _bamAlign, refSeq)));
+
+    const bool isEvidence(isAnom || isSplit || isIndel || isAssm);
+
+    if (nullptr != incountsPtr)
+    {
+        SVLocusEvidenceCount& incounts(*incountsPtr);
+        incounts.total++;
+        if (isAnom) incounts.anom++;
+        if (isSplit) incounts.split++;
+        if (isIndel) incounts.indel++;
+        if (isAssm) incounts.assm++;
+
+        if (! isEvidence) incounts.ignored++;
+
+        if (isAnom)
+        {
+            if (isMateInsertionEvidenceCandidate(bamRead, getMinMapQ()))
+            {
+                // these counts are used to generate background noise rates in later candidate generation stages:
+                incounts.remoteRecoveryCandidates++;
+            }
+        }
+    }
+
+    return isEvidence;
+}
+
+
+
+void
+SVLocusScanner::
+getSVLoci(
+    const bam_record& bamRead,
+    const unsigned defaultReadGroupIndex,
+    const bam_header_info& bamHeader,
+    const reference_contig_segment& refSeq,
+    std::vector<SVLocus>& loci,
+    SampleEvidenceCounts& eCounts) const
+{
+    loci.clear();
+
+    const CachedReadGroupStats& rstats(_stats[defaultReadGroupIndex]);
+    getSVLociImpl(_opt, _dopt, rstats, bamRead, bamHeader, refSeq, loci,
+                  eCounts);
+}
+
+
+
+void
+SVLocusScanner::
+getBreakendPair(
+    const bam_record& localRead,
+    const bam_record* remoteReadPtr,
+    const unsigned defaultReadGroupIndex,
+    const bam_header_info& bamHeader,
+    const reference_contig_segment& localRefSeq,
+    const reference_contig_segment* remoteRefSeqPtr,
+    std::vector<SVObservation>& candidates) const
+{
+    const CachedReadGroupStats& rstats(_stats[defaultReadGroupIndex]);
+
+    // throw evidence range away in this case
+    known_pos_range2 evidenceRange;
+    getReadBreakendsImpl(_opt, _dopt, rstats, localRead, remoteReadPtr,
+                         bamHeader, localRefSeq, remoteRefSeqPtr,
+                         candidates, evidenceRange);
+}
diff --git a/src/c++/lib/manta/SVLocusScanner.hh b/src/c++/lib/manta/SVLocusScanner.hh
new file mode 100644
index 0000000..fb09bf8
--- /dev/null
+++ b/src/c++/lib/manta/SVLocusScanner.hh
@@ -0,0 +1,365 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+/// \author Ole Schulz-Trieglaff
+/// \author Bret Barnes
+///
+
+#pragma once
+
+#include "blt_util/LinearScaler.hh"
+#include "htsapi/bam_record.hh"
+#include "htsapi/bam_record_util.hh"
+#include "htsapi/bam_header_info.hh"
+#include "manta/ReadGroupStatsSet.hh"
+#include "manta/SVCandidate.hh"
+#include "manta/SVLocusEvidenceCount.hh"
+#include "svgraph/SVLocus.hh"
+#include "svgraph/SVLocusSampleCounts.hh"
+#include "options/ReadScannerOptions.hh"
+
+#include <string>
+#include <vector>
+
+
+namespace FragmentSizeType
+{
+enum index_t
+{
+    COMPRESSED,
+    NORMAL,
+    VERYCLOSE,
+    CLOSE,
+    DISTANT
+};
+}
+
+
+/// The counts in the SVLocus Graph represent an abstract weight of evidence supporting each edge/node.
+///
+/// To support large and small-scale evidence in a single graph, we need to allow for different weightings
+/// for different evidence types
+///
+struct SVObservationWeights
+{
+    // noise reduction:
+    static const unsigned observation = 3; ///< 'average' observation weight, this is used to scale noise filtration, but not for any evidence type
+
+    // input evidence:
+    static const unsigned readPair = observation;
+    static const unsigned closeReadPair = 1;
+    static const unsigned veryCloseReadPair = 1;
+    static const unsigned internalReadEvent = observation; ///< indels, soft-clip, etc.
+};
+
+
+
+struct ReadScannerDerivOptions
+{
+    ReadScannerDerivOptions(
+        const ReadScannerOptions& opt,
+        const bool isRNA,
+        const bool stranded) :
+        isSmallCandidates(opt.minCandidateVariantSize<=opt.maxCandidateSizeForLocalAssmEvidence),
+        beforeBreakend(opt.minPairBreakendSize/2),
+        afterBreakend(opt.minPairBreakendSize-beforeBreakend),
+        isUseOverlappingPairs(isRNA),
+        isStranded(stranded)
+    {}
+
+    const bool isSmallCandidates;
+    const pos_t beforeBreakend;
+    const pos_t afterBreakend;
+
+    /// TODO standardize the overlapping pair treatment to be the same for DNA/RNA modes, then
+    /// remove this bit:
+    const bool isUseOverlappingPairs;
+
+    const bool isStranded;
+};
+
+
+
+/// consolidate functions which process a read to determine its
+/// SV evidence value
+///
+/// In manta, evidence is scanned (at least) twice: once for SVLocus Graph generation
+/// and then once again during hygen/scoring. We need to make sure both of these steps
+/// are using the same logic to process read pairs into SV evidence. This class is
+/// responsible for the shared logic
+///
+struct SVLocusScanner
+{
+    SVLocusScanner(
+        const ReadScannerOptions& opt,
+        const std::string& statsFilename,
+        const std::vector<std::string>& alignmentFilename,
+        const bool isRNA,
+        const bool isStranded = false);
+
+    /// this predicate runs isReadFiltered without the mapq components
+    static
+    bool
+    isReadFilteredCore(
+        const bam_record& bamRead)
+    {
+        if      (bamRead.is_filter()) return true;
+        else if (bamRead.is_dup()) return true;
+        // supplementary reads without SA tag
+        else if (bamRead.is_supplement() && (! bamRead.isSASplit())) return true;
+        else
+        {
+            // hack to work with bwamem '-M' formatting,
+            // keep secondary reads when they contain an SA tag
+            if (bamRead.is_secondary())
+            {
+                if (! bamRead.isSASplit()) return true;
+            }
+        }
+        return false;
+    }
+
+
+    static
+    bool
+    isMappedReadFilteredCore(
+        const bam_record& bamRead)
+    {
+        if (isReadFilteredCore(bamRead)) return true;
+        return (bamRead.is_unmapped());
+    }
+
+    /// this predicate runs any fast tests on the acceptability of a
+    /// read for the SVLocus build
+    /// Tests also for low mapq
+    bool
+    isReadFiltered(
+        const bam_record& bamRead) const
+    {
+        if      (isReadFilteredCore(bamRead)) return true;
+        else if (bamRead.map_qual() < _opt.minMapq) return true;
+        return false;
+    }
+
+    unsigned
+    getMinMapQ() const
+    {
+        return _opt.minMapq;
+    }
+
+    unsigned
+    getMinTier2MapQ() const
+    {
+        return _opt.minTier2Mapq;
+    }
+
+    /// custom version of proper pair bit test:
+    bool
+    isProperPair(
+        const bam_record& bamRead,
+        const unsigned defaultReadGroupIndex) const;
+
+    /// return true if the read pair is anomalous, for any anomaly type besides being a short innie read:
+    ///
+    /// according to this method nothing besides mapped read pairs can be anomalous, so all single read
+    /// anomalies (SA tags, CIGAR, semi-aligned) have to be detected elsewhere
+    bool
+    isNonCompressedAnomalous(
+        const bam_record& bamRead,
+        const unsigned defaultReadGroupIndex) const;
+
+    /// large indels in CIGAR string
+    bool
+    isLocalIndelEvidence(
+        const SimpleAlignment& bamAlign) const;
+
+    /// semi-aligned and soft-clipped edges
+    bool
+    isSemiAlignedEvidence(
+        const bam_record& bamRead,
+        const SimpleAlignment& bamAlign,
+        const reference_contig_segment& refSeq) const;
+
+    /// \brief is the read likely to indicate the presence of a small SV?
+    ///
+    /// this function flags reads which could contribute to a local small-variant assembly
+    /// but would not otherwise be caught by the proper pair function
+    ///
+    /// "small" here is relative -- it means any event at a size where read pair evidence will not be dominant
+    ///
+    /// Note that the thresholds in this function are more stringent than the equivalent scan used to
+    /// pick up reads prior to assembly -- in this case false positives could clog up the graph and
+    /// interfere with larger event discovery if not kept under control
+    bool
+    isLocalAssemblyEvidence(
+        const bam_record& bamRead,
+        const reference_contig_segment& refSeq) const;
+
+    bool
+    isSVEvidence(
+        const bam_record& bamRead,
+        const unsigned defaultReadGroupIndex,
+        const reference_contig_segment& refSeq,
+        SVLocusEvidenceCount* incountsPtr = nullptr) const;
+
+    /// return zero to many SVLocus objects if the read supports any
+    /// structural variant(s) (detectable by manta)
+    ///
+    /// \param defaultReadGroupIndex the read group index to use in the absence of an RG tag
+    /// (for now RGs are ignored for the purpose of gathering insert stats)
+    ///
+    void
+    getSVLoci(
+        const bam_record& bamRead,
+        const unsigned defaultReadGroupIndex,
+        const bam_header_info& bamHeader,
+        const reference_contig_segment& refSeq,
+        std::vector<SVLocus>& loci,
+        SampleEvidenceCounts& eCounts) const;
+
+    /// get local and remote breakends for each SV Candidate which can be extracted from a read pair
+    ///
+    /// if remote read is not available, set remoteReadPtr to NULL and a best estimate will be generated for the remote breakend
+    ///
+    /// for all candidates, if one breakend is estimated from localRead and one is estimated from remoteRead, then
+    /// the local breakend will be placed in candidate bp1 and the remote breakend will be placed in candidate.bp2
+    ///
+    void
+    getBreakendPair(
+        const bam_record& localRead,
+        const bam_record* remoteReadPtr,
+        const unsigned defaultReadGroupIndex,
+        const bam_header_info& bamHeader,
+        const reference_contig_segment& localRefSeq,
+        const reference_contig_segment* remoteRefSeqPtr,
+        std::vector<SVObservation>& candidates) const;
+
+    /// this information is needed for the whole bam, not just one read group:
+    int
+    getShadowSearchRange(
+        const unsigned defaultReadGroupIndex) const
+    {
+        return _stats[defaultReadGroupIndex].shadowSearchRange;
+    }
+
+    /// provide direct access to the frag distro for
+    /// functions which can't be cached
+    ///
+    const SizeDistribution&
+    getFragSizeDistro(
+        const unsigned defaultReadGroupIndex) const
+    {
+        return _rss.getStats(defaultReadGroupIndex).fragStats;
+    }
+
+    struct Range
+    {
+        double min = 0;
+        double max = 0;
+    };
+
+    const Range&
+    getEvidencePairRange(
+        const unsigned readGroupIndex) const
+    {
+        return _stats[readGroupIndex].evidencePair;
+    }
+
+    const Range&
+    getExtremeFifthRange() const
+    {
+        return _fifthPerc;
+    }
+
+    struct CachedReadGroupStats
+    {
+        /// fragment size range assumed for the purpose of creating SVLocusGraph regions
+        Range breakendRegion;
+
+        /// fragment size range assumed for the purpose of creating SVLocusGraph regions,
+        /// this range is used exclusively for large scale events (non-deletion or deletion above a threshold size):
+        Range largeScaleEventBreakendRegion;
+
+        /// fragment size range used to determine if a read is anomalous
+        Range properPair;
+
+        Range evidencePair;
+
+        /// range fixed the 5th and 95th percentiles:
+        Range fifthPerc;
+
+        int shadowSearchRange = 0;
+
+        int minDistantFragmentSize = 0; ///< beyond the properPair anomalous threshold, there is a threshold to distinguish close and far pairs for the purpose of evidence weight
+        int minCloseFragmentSize = 0; ///< beyond the properPair anomalous threshold, there is a threshold to distinguish 'really-close' and 'close' pairs for the purpose of evidence weight
+        int minVeryCloseFragmentSize = 0;
+
+        //LinearScaler<int> veryCloseEventScaler; ///< used to scale down breakend size as fragments get smaller
+
+        LinearScaler<int> largeEventRegionScaler; ///< used to set expanded breakend sizes for large events
+    };
+
+    bool
+    isUseOverlappingPairs() const
+    {
+        return _dopt.isUseOverlappingPairs;
+    }
+
+private:
+
+    /// fragments sizes get thrown is serveral pre-defined categories:
+    ///
+    /// assumes a mapped read pair -- check this in code if making this method non-private
+    FragmentSizeType::index_t
+    _getFragmentSizeType(
+        const bam_record& bamRead,
+        const unsigned defaultReadGroupIndex) const;
+
+    /// test whether a fragment is significantly larger than expected
+    ///
+    /// this function is useful to eliminate reads which fail the ProperPair test
+    /// but are still very small
+    ///
+    /// assumes a mapped read pair -- check this in code if making this method non-private
+    bool
+    _isLargeFragment(
+        const bam_record& bamRead,
+        const unsigned defaultReadGroupIndex) const;
+
+    /////////////////////////////////////////////////
+    // data:
+    const ReadScannerOptions _opt;
+    const ReadScannerDerivOptions _dopt;
+    ReadGroupStatsSet _rss;
+
+    std::vector<CachedReadGroupStats> _stats;
+
+    /// extreme 5th-95th percentiles over all read groups:
+    Range _fifthPerc;
+
+    // cached temporary to reduce syscalls:
+    mutable SimpleAlignment _bamAlign;
+
+//    std::string lastQname;
+//    uint8_t lastMapq;
+};
+
diff --git a/src/c++/lib/manta/SVLocusScannerSemiAligned.cpp b/src/c++/lib/manta/SVLocusScannerSemiAligned.cpp
new file mode 100644
index 0000000..23f177d
--- /dev/null
+++ b/src/c++/lib/manta/SVLocusScannerSemiAligned.cpp
@@ -0,0 +1,340 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+/// \author Ole Schulz-Trieglaff
+/// \author Bret Barnes
+/// \author Felix Schlesinger
+///
+
+#include "alignment/ReadScorer.hh"
+
+#include "common/Exceptions.hh"
+#include "manta/SVLocusScannerSemiAligned.hh"
+
+#include "boost/foreach.hpp"
+
+
+//#define DEBUG_SEMI_ALIGNED
+
+#ifdef DEBUG_SEMI_ALIGNED
+#include "blt_util/log.hh"
+#include <iostream>
+#endif
+
+
+
+/// In the general case we want 'N''s to be counted as mismatches, but for
+/// the semi-aligned metric, we don't want N's to nominate read segments as
+/// SV-associated because of a string of Ns, so we treat these as matches
+///
+static
+bool
+isSemiAlignedBaseMatch(
+    const char a,
+    const char b)
+{
+    if ((a=='N') || (b=='N')) return true;
+    return (a==b);
+}
+
+
+
+/// report the length from 0 to immediately before the indicated number of
+/// contiguous matches
+///
+static
+void
+leadingEdgeMismatchLength(
+    const SimpleAlignment& bamAlign,
+    const bam_seq& querySeq,
+    const reference_contig_segment& refSeq,
+    const unsigned contiguousMatchCount,
+    unsigned& leadingLength,
+    pos_t& leadingRefPos)
+{
+    using namespace ALIGNPATH;
+
+    assert(contiguousMatchCount != 0);
+
+    pos_t readIndex(0);
+    pos_t refIndex(bamAlign.pos);
+
+    leadingLength=0;
+    leadingRefPos=refIndex;
+
+    unsigned matchLength(0);
+    for (const path_segment& ps : bamAlign.path)
+    {
+        if (is_segment_align_match(ps.type))
+        {
+            for (unsigned segPos(0); segPos<ps.length; ++segPos)
+            {
+                if (isSemiAlignedBaseMatch(querySeq.get_char(readIndex+segPos), refSeq.get_base(refIndex+segPos)))
+                {
+                    matchLength++;
+
+                    if (matchLength>=contiguousMatchCount)
+                    {
+                        leadingLength=(readIndex+segPos)-(matchLength-1);
+                        leadingRefPos=(refIndex+segPos)-(matchLength-1);
+                        return;
+                    }
+                }
+                else
+                {
+                    matchLength=0;
+                }
+            }
+        }
+        else if (is_segment_type_indel(ps.type))
+        {
+            matchLength = 0;
+        }
+
+        if (is_segment_type_read_length(ps.type)) readIndex += ps.length;
+        if (is_segment_type_ref_length(ps.type)) refIndex += ps.length;
+    }
+
+    leadingLength=readIndex;
+    leadingRefPos=refIndex;
+}
+
+
+
+static
+void
+trailingEdgeMismatchLength(
+    const SimpleAlignment& bamAlign,
+    const bam_seq& querySeq,
+    const reference_contig_segment& refSeq,
+    const unsigned contiguousMatchCount,
+    unsigned& trailingLength,
+    pos_t& trailingRefPos)
+{
+    using namespace ALIGNPATH;
+
+    assert(contiguousMatchCount != 0);
+
+    const pos_t readSize(querySeq.size());
+
+    pos_t readIndex(readSize-1);
+    pos_t refIndex(bamAlign.pos + apath_ref_length(bamAlign.path)-1);
+
+    unsigned matchLength(0);
+    BOOST_REVERSE_FOREACH(const path_segment& ps, bamAlign.path)
+    {
+        if (is_segment_align_match(ps.type))
+        {
+            for (unsigned segPos(0); segPos<ps.length; ++segPos)
+            {
+                if (isSemiAlignedBaseMatch(querySeq.get_char(readIndex-segPos), refSeq.get_base(refIndex-segPos)))
+                {
+                    matchLength++;
+
+                    if (matchLength>=contiguousMatchCount)
+                    {
+                        trailingLength=(readSize-(readIndex-segPos))-matchLength;
+                        trailingRefPos=(refIndex-segPos)+matchLength;
+                        return;
+                    }
+                }
+                else
+                {
+                    matchLength=0;
+                }
+            }
+        }
+        else if (is_segment_type_indel(ps.type))
+        {
+            matchLength = 0;
+        }
+
+        if (is_segment_type_read_length(ps.type)) readIndex -= ps.length;
+        if (is_segment_type_ref_length(ps.type)) refIndex -= ps.length;
+    }
+
+    trailingLength=readSize-(readIndex+1);
+    trailingRefPos=refIndex+1;
+}
+
+
+
+/// report the length from 0 to immediately before the indicated number of
+/// contiguous matches moving inwards from each end of the read
+///
+/// \param[out] leadingLength semi-aligned length in read coordinates from the start of the read
+/// \param[out] leadingRefPos reference position of read start after removing semi-aligned portion from consideration
+///
+/// ...similar for trailing output params...
+///
+static
+void
+edgeMismatchLength(
+    const SimpleAlignment& bamAlign,
+    const bam_seq& querySeq,
+    const reference_contig_segment& refSeq,
+    const unsigned contiguousMatchCount,
+    unsigned& leadingLength,
+    pos_t& leadingRefPos,
+    unsigned& trailingLength,
+    pos_t& trailingRefPos)
+{
+    leadingEdgeMismatchLength(bamAlign,querySeq,refSeq,contiguousMatchCount,leadingLength,leadingRefPos);
+    trailingEdgeMismatchLength(bamAlign,querySeq,refSeq,contiguousMatchCount,trailingLength,trailingRefPos);
+
+    const unsigned readSize(querySeq.size());
+    assert(leadingLength<=readSize);
+    assert(trailingLength<=readSize);
+}
+
+
+
+void
+getSVBreakendCandidateSemiAligned(
+    const bam_record& bamRead,
+    const SimpleAlignment& bamAlign,
+    const reference_contig_segment& refSeq,
+    const bool isUseOverlappingPairs,
+    unsigned& leadingMismatchLen,
+    pos_t& leadingRefPos,
+    unsigned& trailingMismatchLen,
+    pos_t& trailingRefPos,
+    const uint8_t minQ,
+    const float minQFrac)
+{
+    static const unsigned contiguousMatchCount(5);
+
+    leadingMismatchLen = 0;
+    leadingRefPos = 0;
+    trailingMismatchLen = 0;
+    trailingRefPos = 0;
+
+    if (is_possible_adapter_pair(bamRead)) return;
+
+    // create a new alignment with all soft-clip sections forced to match:
+    const SimpleAlignment matchedAlignment(matchifyEdgeSoftClip(bamAlign));
+
+    if (! isUseOverlappingPairs)
+    {
+        if (is_overlapping_pair(bamRead, matchedAlignment)) return;
+    }
+
+    using namespace ALIGNPATH;
+    const bam_seq querySeq(bamRead.get_bam_read());
+
+    const uint8_t* qual(bamRead.qual());
+    const unsigned readSize(bamRead.read_size());
+
+    unsigned leadingMismatchLenTmp(0);
+    unsigned trailingMismatchLenTmp(0);
+    edgeMismatchLength(matchedAlignment, querySeq, refSeq, contiguousMatchCount,
+                       leadingMismatchLenTmp, leadingRefPos,
+                       trailingMismatchLenTmp, trailingRefPos);
+
+    if ((leadingMismatchLenTmp + trailingMismatchLenTmp) >= readSize) return;
+
+    if (0 != leadingMismatchLenTmp)
+    {
+        if (bamRead.is_fwd_strand() || (!is_overlapping_pair(bamRead, matchedAlignment)))
+        {
+            unsigned minQCount(0);
+            for (unsigned pos(0); pos<leadingMismatchLenTmp; ++pos)
+            {
+                if (qual[pos] >= minQ) ++minQCount;
+            }
+            if ((static_cast<float>(minQCount)/(leadingMismatchLenTmp)) >= minQFrac)
+            {
+                leadingMismatchLen = leadingMismatchLenTmp;
+            }
+        }
+#ifdef DEBUG_SEMI_ALIGNED
+        else
+            log_os << " Overlapping_pair leading" << " read qname=" << bamRead.qname() << std::endl;
+#endif
+    }
+
+    if (0 != trailingMismatchLenTmp)
+    {
+        if ((!bamRead.is_fwd_strand()) || (!is_overlapping_pair(bamRead, matchedAlignment)))
+        {
+            unsigned minQCount(0);
+            for (unsigned pos(0); pos<trailingMismatchLenTmp; ++pos)
+            {
+                if (qual[readSize-pos-1] >= minQ) ++minQCount;
+            }
+            if ((static_cast<float>(minQCount)/(trailingMismatchLenTmp)) >= minQFrac)
+            {
+                trailingMismatchLen = trailingMismatchLenTmp;
+            }
+        }
+#ifdef DEBUG_SEMI_ALIGNED
+        else
+            log_os << "Overlapping_pair trailing" << " read qname=" << bamRead.qname() << std::endl;
+#endif
+    }
+}
+
+
+#if 0
+// TODO: pass iterator instead of ref substring
+bool
+isSemiAligned(
+    const bam_record& bamRead,
+    const std::string& refSeq,
+    const double minSemiAlignedScore)
+{
+    // read cannot be semi-aligned in unmapped
+    if (bamRead.is_unmapped()) return false;
+
+    ALIGNPATH::path_t apath;
+    bam_cigar_to_apath(bamRead.raw_cigar(),bamRead.n_cigar(),apath);
+
+    // soft-clipped reads, not looked at here
+    /*if (refSeq.size() != qrySeq.size())
+    {
+        std::cerr << "Skip because of bad ref length." << std::endl;
+        return false;
+    }*/
+
+    const std::string qrySeq(bamRead.get_bam_read().get_string());
+    //std::cerr << "qrySeq = " << qrySeq << std::endl;
+    //std::cerr << "refSeq = " << refSeq << std::endl;
+    apath_add_seqmatch(qrySeq.begin(), qrySeq.end(),
+                       refSeq.begin(), refSeq.end(),
+                       apath);
+    //std::cerr << "apath = " << apath << std::endl;
+
+    const double semiAlignedScore(ReadScorer::getSemiAlignedMetric(bamRead.read_size(),apath,bamRead.qual()));
+    //std::cerr << " semi-aligned score=" << semiAlignedScore << "\n";
+#ifdef DEBUG_SEMI_ALIGNED
+    static const std::string logtag("isSemiAligned");
+    log_os << logtag << " semi-aligned score=" << semiAlignedScore << " read qname=" << bamRead.qname() << " apath=" << apath <<  std::endl;
+#endif
+    //std::cerr << " semi-aligned score=" << semiAlignedScore << " read qname=" << bamRead.qname() << " apath=" << apath <<  std::endl;
+    //if (semiAlignedScore>minSemiAlignedScore) {
+    //std::cerr << "SEMI-ALIGNED" << std::endl;
+    //} else {
+    //std::cerr << "NOT SEMI-ALIGNED" << std::endl;
+    //}
+    return (semiAlignedScore>minSemiAlignedScore);
+}
+#endif
diff --git a/src/c++/lib/manta/SVLocusScannerSemiAligned.hh b/src/c++/lib/manta/SVLocusScannerSemiAligned.hh
new file mode 100644
index 0000000..f9b0af8
--- /dev/null
+++ b/src/c++/lib/manta/SVLocusScannerSemiAligned.hh
@@ -0,0 +1,75 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+/// \author Ole Schulz-Trieglaff
+/// \author Bret Barnes
+/// \author Felix Schlesinger
+///
+
+#pragma once
+
+#include "blt_util/blt_types.hh"
+#include "blt_util/reference_contig_segment.hh"
+#include "blt_util/SimpleAlignment.hh"
+#include "htsapi/bam_record_util.hh"
+
+
+/// searches for poorly aligned read ends
+///
+/// search is based on high-quality mismatches, any soft-clipped sections
+/// will be realigned before searching for semi-aligned sections
+///
+/// \param[in] minQ
+/// \param[in] minQFrac this fraction of bases must have qual>=minQ within the clipped region
+///
+void
+getSVBreakendCandidateSemiAligned(
+    const bam_record& bamRead,
+    const SimpleAlignment& bamAlign,
+    const reference_contig_segment& refSeq,
+    const bool isUseOverlappingPairs,
+    unsigned& leadingMismatchLen,
+    pos_t& leadingRefPos,
+    unsigned& trailingMismatchLen,
+    pos_t& trailingRefPos,
+    const uint8_t minQ = 20,
+    const float minQFrac = 0.75);
+
+
+/// simplified interface to the full function above:
+inline
+void
+getSVBreakendCandidateSemiAlignedSimple(
+    const bam_record& bamRead,
+    const SimpleAlignment& bamAlign,
+    const reference_contig_segment& refSeq,
+    const bool isUseOverlappingPairs,
+    unsigned& leadingMismatchLen,
+    unsigned& trailingMismatchLen)
+{
+    pos_t leadingRefPos(0), trailingRefPos(0);
+    getSVBreakendCandidateSemiAligned(
+        bamRead, bamAlign, refSeq,
+        isUseOverlappingPairs,
+        leadingMismatchLen, leadingRefPos,
+        trailingMismatchLen, trailingRefPos);
+}
diff --git a/src/c++/lib/manta/SVModelScoreInfo.hh b/src/c++/lib/manta/SVModelScoreInfo.hh
new file mode 100644
index 0000000..a7c30af
--- /dev/null
+++ b/src/c++/lib/manta/SVModelScoreInfo.hh
@@ -0,0 +1,60 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders and Xiaoyu Chen
+///
+
+#pragma once
+
+#include "manta/SVScoreInfo.hh"
+#include "manta/SVScoreInfoDiploid.hh"
+#include "manta/SVScoreInfoSomatic.hh"
+#include "manta/SVScoreInfoTumor.hh"
+
+
+/// all scoring info for one sv candidate, including data related to specific scoring models
+///
+///
+struct SVModelScoreInfo
+{
+    void
+    setSampleCount(
+        const unsigned sampleCount,
+        const unsigned diploidSampleCount)
+    {
+        base.setSampleCount(sampleCount);
+        diploid.setSampleCount(diploidSampleCount);
+    }
+
+    void
+    clear()
+    {
+        base.clear();
+        diploid.clear();
+        somatic.clear();
+        tumor.clear();
+    }
+
+    SVScoreInfo base;
+    SVScoreInfoDiploid diploid;
+    SVScoreInfoSomatic somatic;
+    SVScoreInfoTumor tumor;
+};
diff --git a/src/c++/lib/manta/SVMultiJunctionCandidate.cpp b/src/c++/lib/manta/SVMultiJunctionCandidate.cpp
new file mode 100644
index 0000000..b5721be
--- /dev/null
+++ b/src/c++/lib/manta/SVMultiJunctionCandidate.cpp
@@ -0,0 +1,46 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "manta/SVMultiJunctionCandidate.hh"
+
+#include <iostream>
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVMultiJunctionCandidate& scc)
+{
+    static const char indent('\t');
+    os << "SVComplexCandidate:\n"
+       << indent << "total_breakend_junctions: " << scc.junction.size() << "\n";
+
+    for (const SVCandidate& sv : scc.junction)
+    {
+        os << sv;
+    }
+
+    return os;
+}
diff --git a/src/c++/lib/manta/SVMultiJunctionCandidate.hh b/src/c++/lib/manta/SVMultiJunctionCandidate.hh
new file mode 100644
index 0000000..95e6254
--- /dev/null
+++ b/src/c++/lib/manta/SVMultiJunctionCandidate.hh
@@ -0,0 +1,51 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "manta/SVCandidate.hh"
+
+
+/// SVComplexCandidate represents an associated grouping of multiple breakend pairs
+///
+/// Examples: The two breakend pairs of a simple inversion would form a complex candidates composed of the two SVCandidates for the
+/// forward and reverse junctions.
+///
+/// for a complex candidates we want to test the concept that the full set of breakends occurred together as part of the same
+/// event, and thus be able to call the full event with perhaps less evidence per each single breakend than would be acceptable
+/// during regular calling.
+///
+struct SVMultiJunctionCandidate
+{
+    SVMultiJunctionCandidate()
+    {}
+
+    std::vector<SVCandidate> junction;
+
+    /// TODO: need to design a quick data structure to iterate through complex event breakend regions and pull out the associated candidate's
+    /// breakends -- is this just a mapping from MJ breakend 'groups' to literal junction breakends??
+};
+
+std::ostream&
+operator<<(std::ostream& os, const SVMultiJunctionCandidate& svc);
diff --git a/src/c++/lib/manta/SVMultiJunctionCandidateUtil.hh b/src/c++/lib/manta/SVMultiJunctionCandidateUtil.hh
new file mode 100644
index 0000000..df6d686
--- /dev/null
+++ b/src/c++/lib/manta/SVMultiJunctionCandidateUtil.hh
@@ -0,0 +1,40 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "manta/SVMultiJunctionCandidate.hh"
+#include "manta/SVCandidateUtil.hh"
+
+/// complex in this case means that we have no specific hypothesis for the SV --
+/// it is just a single genomic region for which we schedule local assembly
+///
+inline
+bool
+isComplexSV(const SVMultiJunctionCandidate& mjSV)
+{
+    if (mjSV.junction.size() != 1) return false;
+
+    return isComplexSV(mjSV.junction[0]);
+}
diff --git a/src/c++/lib/manta/SVReferenceUtil.cpp b/src/c++/lib/manta/SVReferenceUtil.cpp
new file mode 100644
index 0000000..d58330f
--- /dev/null
+++ b/src/c++/lib/manta/SVReferenceUtil.cpp
@@ -0,0 +1,238 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "common/Exceptions.hh"
+#include "htsapi/samtools_fasta_util.hh"
+#include "manta/SVReferenceUtil.hh"
+
+
+
+#if 0
+static
+void
+trimOverlappingRange(
+    known_pos_range2& rA,
+    known_pos_range2& rB)
+{
+    // put ranges in order:
+    known_pos_range2* r1(&rA);
+    known_pos_range2* r2(&rB);
+
+    if (r1->begin_pos() > r2->begin_pos()) std::swap(r1, r2);
+
+    const pos_t overlap(r1->end_pos()-r2->begin_pos());
+    if (overlap <= 0) return;
+
+    r1->set_end_pos(r1->end_pos()-(overlap/2));
+    r2->set_begin_pos(r1->end_pos());
+}
+#endif
+
+
+
+/// produce the reference extraction interval only
+///
+/// \param leadingTrim how much was not returned from the front of the sequence compared to what was requested?
+/// \param trailingTrim how much was not returned from the back of the sequence compared to what was requested?
+///
+static
+void
+getBpReferenceInterval(
+    const bam_header_info& header,
+    const pos_t extraRefEdgeSize,
+    const GenomeInterval& bpInterval,
+    GenomeInterval& refInterval,
+    unsigned& leadingTrim,
+    unsigned& trailingTrim)
+{
+    const bam_header_info::chrom_info& chromInfo(header.chrom_data[bpInterval.tid]);
+
+    const pos_t chromSize(static_cast<pos_t>(chromInfo.length));
+
+    assert(bpInterval.range.begin_pos() <= bpInterval.range.end_pos());
+    if ((bpInterval.range.begin_pos() >= chromSize) || (bpInterval.range.end_pos() <= 0))
+    {
+        using namespace illumina::common;
+
+        std::ostringstream oss;
+        oss << __FUNCTION__ << ": requested reference range has no overlap with chromosome\n"
+            << "\tinterval: " << bpInterval
+            << "\tchromSize: " << chromSize
+            << "\n";
+
+        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+    }
+
+    pos_t beginPos(bpInterval.range.begin_pos()-extraRefEdgeSize);
+    if (beginPos < 0)
+    {
+        leadingTrim = -beginPos;
+        beginPos = 0;
+    }
+    else
+    {
+        leadingTrim = 0;
+    }
+
+    pos_t endPos(bpInterval.range.end_pos()+extraRefEdgeSize);
+    if (endPos > chromSize)
+    {
+        trailingTrim = (endPos - chromSize);
+        endPos = chromSize;
+    }
+    else
+    {
+        trailingTrim = 0;
+    }
+
+    refInterval.tid = bpInterval.tid;
+    refInterval.range.set_begin_pos(beginPos);
+    refInterval.range.set_end_pos(endPos);
+}
+
+
+
+/// simpler calling convention which throws away trim values
+static
+void
+getBpReferenceInterval(
+    const bam_header_info& header,
+    const pos_t extraRefEdgeSize,
+    const GenomeInterval& bpInterval,
+    GenomeInterval& refInterval)
+{
+    unsigned leadingTrim;
+    unsigned trailingTrim;
+    getBpReferenceInterval(header, extraRefEdgeSize, bpInterval, refInterval, leadingTrim, trailingTrim);
+}
+
+
+
+/// given a reference extraction interval, produce the corresponding ref contig segment
+static
+void
+getIntervalReferenceSegment(
+    const std::string& referenceFilename,
+    const bam_header_info& header,
+    const GenomeInterval& refInterval,
+    reference_contig_segment& intervalRefSeq)
+{
+    const bam_header_info::chrom_info& chromInfo(header.chrom_data[refInterval.tid]);
+    const std::string& chrom(chromInfo.label);
+
+    // get REF
+    const known_pos_range2& range(refInterval.range);
+    intervalRefSeq.set_offset(range.begin_pos());
+
+    // note: begin and end pos follow Manta's closed-open bpInterval conventions (a la bedtools,
+    // but the ref function below takes closed-closed endpoints, so we subtract one from endPos
+    get_standardized_region_seq(referenceFilename, chrom, range.begin_pos(), (range.end_pos()-1), intervalRefSeq.seq());
+
+    if (intervalRefSeq.seq().size() != range.size())
+    {
+        using namespace illumina::common;
+
+        std::ostringstream oss;
+        oss << "getIntervalReferenceSegment: Unexpected reference sequence\n"
+            << "\t" << referenceFilename
+            << "\t" << chrom
+            << "\t" << range
+            << "\n";
+
+        oss << "\texpected_size: " << range.size()
+            << "\treturned_size: " << intervalRefSeq.seq().size()
+            << "\n";
+
+        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+    }
+}
+
+
+
+void
+getIntervalReferenceSegment(
+    const std::string& referenceFilename,
+    const bam_header_info& header,
+    const pos_t extraRefEdgeSize,
+    const GenomeInterval& bpInterval,
+    reference_contig_segment& intervalRefSeq,
+    unsigned& leadingTrim,
+    unsigned& trailingTrim)
+{
+    GenomeInterval refInterval;
+    getBpReferenceInterval(header, extraRefEdgeSize, bpInterval, refInterval, leadingTrim, trailingTrim);
+
+    getIntervalReferenceSegment(referenceFilename, header, refInterval, intervalRefSeq);
+}
+
+
+
+bool
+isRefRegionOverlap(
+    const bam_header_info& header,
+    const pos_t extraRefEdgeSize,
+    const SVCandidate& sv)
+{
+    if (sv.bp1.interval.tid != sv.bp2.interval.tid) return false;
+
+    GenomeInterval bp1RefInterval;
+    GenomeInterval bp2RefInterval;
+    getBpReferenceInterval(header, extraRefEdgeSize, sv.bp1.interval,bp1RefInterval);
+    getBpReferenceInterval(header, extraRefEdgeSize, sv.bp2.interval,bp2RefInterval);
+
+    return (bp1RefInterval.isIntersect(bp2RefInterval));
+}
+
+
+
+bool
+isRefRegionValid(
+    const bam_header_info& header,
+    const GenomeInterval& bpInterval)
+{
+    const bam_header_info::chrom_info& chromInfo(header.chrom_data[bpInterval.tid]);
+    const pos_t chromSize(static_cast<pos_t>(chromInfo.length));
+
+    assert(bpInterval.range.begin_pos() <= bpInterval.range.end_pos());
+    return (! ((bpInterval.range.begin_pos() >= chromSize) || (bpInterval.range.end_pos() <= 0)));
+}
+
+
+
+void
+getSVReferenceSegments(
+    const std::string& referenceFilename,
+    const bam_header_info& header,
+    const pos_t extraRefEdgeSize,
+    const SVCandidate& sv,
+    reference_contig_segment& bp1ref,
+    reference_contig_segment& bp2ref,
+    unsigned& bp1LeadingTrim,
+    unsigned& bp1TrailingTrim,
+    unsigned& bp2LeadingTrim,
+    unsigned& bp2TrailingTrim)
+{
+    getIntervalReferenceSegment(referenceFilename, header, extraRefEdgeSize, sv.bp1.interval, bp1ref, bp1LeadingTrim, bp1TrailingTrim);
+    getIntervalReferenceSegment(referenceFilename, header, extraRefEdgeSize, sv.bp2.interval, bp2ref, bp2LeadingTrim, bp2TrailingTrim);
+}
diff --git a/src/c++/lib/manta/SVReferenceUtil.hh b/src/c++/lib/manta/SVReferenceUtil.hh
new file mode 100644
index 0000000..59522ec
--- /dev/null
+++ b/src/c++/lib/manta/SVReferenceUtil.hh
@@ -0,0 +1,108 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/reference_contig_segment.hh"
+#include "htsapi/bam_header_info.hh"
+#include "manta/SVCandidate.hh"
+
+#include <string>
+
+
+/// test whether the two svCandidate breakend regions will overlap
+/// after the extra reference padding has been added
+bool
+isRefRegionOverlap(
+    const bam_header_info& header,
+    const pos_t extraRefEdgeSize,
+    const SVCandidate& sv);
+
+
+/// test whether the interval intersects a reference contig at all
+///
+bool
+isRefRegionValid(
+    const bam_header_info& header,
+    const GenomeInterval& interval);
+
+
+/// given a genome interval, attempt to add an extra buffer
+/// to the interval and return the reference sequence corresponding
+/// to this interval
+///
+/// \params[in] extraRefEdgeSize add this value to the ends of each
+///             interval prior to chomosome length clipping and reference
+///             retrieval
+/// \params[out] leadingTrim indicates how much was cut from the
+///              front of the requested interval (with edge buffer)
+/// \params[out] trailingTrim indicates how much was cut from the
+///              end of the requested interval (with edge buffer)
+///
+void
+getIntervalReferenceSegment(
+    const std::string& referenceFilename,
+    const bam_header_info& header,
+    const pos_t extraRefEdgeSize,
+    const GenomeInterval& interval,
+    reference_contig_segment& intervalRef,
+    unsigned& leadingTrim,
+    unsigned& trailingTrim);
+
+
+/// alternate interface to getIntervalReferenceSegment for applications
+/// where the returned trim value is not needed:
+inline
+void
+getIntervalReferenceSegment(
+    const std::string& referenceFilename,
+    const bam_header_info& header,
+    const pos_t extraRefEdgeSize,
+    const GenomeInterval& interval,
+    reference_contig_segment& intervalRef)
+{
+    unsigned leadingTrim;
+    unsigned trailingTrim;
+    getIntervalReferenceSegment(referenceFilename, header, extraRefEdgeSize, interval, intervalRef,leadingTrim, trailingTrim);
+}
+
+
+/// extract the reference sequence around each breakend into a reference_contig_segment
+/// object
+///
+/// for each region, we extract the hypothetical breakend region + extraRefEdgeSize bases
+/// on each side, but limit the region to [0,chrom_size-1]
+///
+void
+getSVReferenceSegments(
+    const std::string& referenceFilename,
+    const bam_header_info& header,
+    const pos_t extraRefEdgeSize,
+    const SVCandidate& sv,
+    reference_contig_segment& bp1ref,
+    reference_contig_segment& bp2ref,
+    unsigned& bp1LeadingTrim,
+    unsigned& bp1TrailingTrim,
+    unsigned& bp2LeadingTrim,
+    unsigned& bp2TrailingTrim);
diff --git a/src/c++/lib/manta/SVScoreInfo.cpp b/src/c++/lib/manta/SVScoreInfo.cpp
new file mode 100644
index 0000000..1140b7c
--- /dev/null
+++ b/src/c++/lib/manta/SVScoreInfo.cpp
@@ -0,0 +1,268 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders and Xiaoyu Chen
+///
+
+#include "manta/SVScoreInfo.hh"
+#include "blt_util/log.hh"
+#include "blt_util/seq_util.hh"
+#include "blt_util/seq_printer.hh"
+
+#include <iostream>
+
+
+
+SVAlignmentInfo::
+SVAlignmentInfo(
+    const SVCandidate& sv,
+    const SVCandidateAssemblyData& assemblyData) :
+    _isSpanning(assemblyData.isSpanning),
+    _bp1ContigReversed(assemblyData.bporient.isBp1Reversed),
+    _bp2ContigReversed(assemblyData.bporient.isBp2Reversed)
+{
+    // for imprecise SVs, split-read evidence won't be assigned
+    if (sv.isImprecise()) return;
+
+    const pos_t bp1HomLength(sv.bp1.interval.range.size()-1);
+    const pos_t bp2HomLength(sv.bp2.interval.range.size()-1);
+    assert(bp1HomLength >= 0);
+    assert(bp2HomLength >= 0);
+
+    contigSeq = assemblyData.extendedContigs[sv.assemblyAlignIndex];
+
+    if (_isSpanning)
+    {
+        const JumpAlignmentResult<int>& alignment(assemblyData.spanningAlignments[sv.assemblyAlignIndex]);
+
+        // get offsets of breakpoints in the extended contig
+        const pos_t align1Size(apath_read_length(alignment.align1.apath));
+
+        // the beginPos of align1 is the length of reference padding in the extended contig
+        // |ref padding| + |align1| + |insert| + |align2|
+        // both bp1 and bp2 include the insert and micro-homology,
+        // which can avoid false split-read evidence from normal sample when the micorhomology is long
+
+        // csaunders: leave homology size out until we're prepared downstream to
+        // deal with each breakpoint as a range instead of a point value
+
+        const pos_t bp1ContigBeginPos(alignment.align1.beginPos + align1Size -1);
+        const pos_t bp2ContigBeginPos(bp1ContigBeginPos + alignment.jumpInsertSize);
+
+        bp1ContigOffset.set_begin_pos(bp1ContigBeginPos);
+        bp2ContigOffset.set_begin_pos(bp2ContigBeginPos);
+
+        // before swap, bp[1/2]ContigOffset are in alignment order (like alignment.align[12])
+        // after swap, they are in the same bp order indicated on the sv (like sv.bp[12])
+        if (assemblyData.bporient.isBp2AlignedFirst)
+        {
+            std::swap(bp1ContigOffset, bp2ContigOffset);
+        }
+
+        /// add micro-homology after swapping so that we can extract information from sv
+        bp1ContigOffset.set_end_pos(bp1ContigOffset.begin_pos() + bp1HomLength);
+        bp2ContigOffset.set_end_pos(bp2ContigOffset.begin_pos() + bp2HomLength);
+
+        if (_bp1ContigReversed || _bp2ContigReversed)
+        {
+            assert(! (_bp1ContigReversed && _bp2ContigReversed));
+
+            revContigSeq = reverseCompCopyStr(contigSeq);
+            // reset offset w.r.t. the reversed contig
+            // note this is -2 and not -1 because we're jumping to the other "side" of the breakend:
+            const pos_t revSize(contigSeq.size()-2);
+            if (_bp1ContigReversed)
+            {
+                const known_pos_range2 tmpRange(bp1ContigOffset);
+                bp1ContigOffset.set_begin_pos(revSize - tmpRange.end_pos());
+                bp1ContigOffset.set_end_pos(revSize - tmpRange.begin_pos());
+            }
+            else
+            {
+                const known_pos_range2 tmpRange(bp2ContigOffset);
+                bp2ContigOffset.set_begin_pos(revSize - tmpRange.end_pos());
+                bp2ContigOffset.set_end_pos(revSize - tmpRange.begin_pos());
+            }
+        }
+        assert(bp1ContigOffset.begin_pos() <= bp1ContigOffset.end_pos());
+        assert(bp2ContigOffset.begin_pos() <= bp2ContigOffset.end_pos());
+
+        // get reference regions
+        const reference_contig_segment& bp1Ref = assemblyData.bp1ref;
+        const reference_contig_segment& bp2Ref = assemblyData.bp2ref;
+        bp1RefSeq = bp1Ref.seq();
+        bp2RefSeq = bp2Ref.seq();
+        // get offsets of breakpoints in the reference regions
+        // again, both bp1 and bp2 include the micro-homology
+
+        // csaunders: see above regarding micro-homology handling
+
+        bp1RefOffset.set_begin_pos(sv.bp1.interval.range.begin_pos() - bp1Ref.get_offset());
+        bp1RefOffset.set_end_pos(bp1RefOffset.begin_pos() + bp1HomLength);
+
+        bp2RefOffset.set_begin_pos(sv.bp2.interval.range.begin_pos() - bp2Ref.get_offset());
+        bp2RefOffset.set_end_pos(bp2RefOffset.begin_pos() + bp2HomLength);
+    }
+    else
+    {
+        // get offsets of breakpoints in the extended contig
+        const AlignmentResult<int>& alignment(assemblyData.smallSVAlignments[sv.assemblyAlignIndex]);
+        const std::pair<unsigned, unsigned>& alignSegment(assemblyData.smallSVSegments[sv.assemblyAlignIndex][sv.assemblySegmentIndex]);
+
+        const ALIGNPATH::path_t apathTillSvStart(&alignment.align.apath[0], &alignment.align.apath[alignSegment.first]);
+        const ALIGNPATH::path_t apathTillSvEnd(&alignment.align.apath[0], &alignment.align.apath[alignSegment.second+1]);
+
+        // the beginPos of align is the length of reference padding in the extended contig
+        // |ref padding| + |alignment segments|
+        // both bp1 and bp2 include the insert and micro-homology,
+        // which can avoid false split-read evidence from normal sample when the micorhomology is long
+
+        bp1ContigOffset.set_begin_pos(alignment.align.beginPos + apath_read_length(apathTillSvStart) - 1);
+        bp1ContigOffset.set_end_pos(bp1ContigOffset.begin_pos() + bp1HomLength);
+        bp2ContigOffset.set_begin_pos(alignment.align.beginPos + apath_read_length(apathTillSvEnd) - 1);
+        bp2ContigOffset.set_end_pos(bp2ContigOffset.begin_pos() + bp2HomLength);
+
+        // get reference regions
+        // only bp1ref is used for small events
+        const reference_contig_segment& bp1Ref = assemblyData.bp1ref;
+        bp1RefSeq = bp1Ref.seq();
+
+        // get offsets of breakpoints in the reference regions
+        // again, both bp1 and bp2 include the micro-homology
+        bp1RefOffset.set_range(
+            sv.bp1.interval.range.begin_pos() - bp1Ref.get_offset(),
+            sv.bp1.interval.range.end_pos() -bp1Ref.get_offset());
+        bp2RefOffset.set_range(
+            sv.bp2.interval.range.begin_pos() - bp1Ref.get_offset(),
+            sv.bp2.interval.range.end_pos() -bp1Ref.get_offset());
+    }
+}
+
+
+
+bool
+SVAlignmentInfo::
+isMinBpEdge(
+    const unsigned minEdge) const
+{
+    const int iminEdge(minEdge);
+    if ((bp1ContigOffset.begin_pos()+1) < iminEdge) return false;
+    if ((bp2ContigOffset.begin_pos()+1) < iminEdge) return false;
+    if ((bp1RefOffset.begin_pos()+1) < iminEdge) return false;
+    if ((bp2RefOffset.begin_pos()+1) < iminEdge) return false;
+
+    const pos_t contigBpSize(contigSeq.size()-1);
+    if ((contigBpSize - bp1ContigOffset.end_pos()) < iminEdge) return false;
+    if ((contigBpSize - bp2ContigOffset.end_pos()) < iminEdge) return false;
+
+    const pos_t bp1RefSize(bp1ReferenceSeq().size());
+    if ((bp1RefSize - 1 - bp1RefOffset.end_pos()) < iminEdge) return false;
+
+    const pos_t bp2RefSize(bp2ReferenceSeq().size());
+    if ((bp2RefSize - 1 - bp2RefOffset.end_pos()) < iminEdge) return false;
+
+    return true;
+}
+
+
+
+static
+void
+dumpSeq(
+    const char* label,
+    const std::string& seq,
+    std::ostream& os)
+{
+    os << label << " size/seq: " << seq.size() << '\n';
+    printSeq(seq,os);
+    os << '\n';
+}
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVAlignmentInfo& ai)
+{
+    os << "SVAlignmentInfo: isSpanning: " << ai.isSpanning() << '\n';
+    dumpSeq("Contig",ai.contigSeq,os);
+    dumpSeq("Rev Contig",ai.revContigSeq,os);
+    os << "bp1 contig offset = " << ai.bp1ContigOffset << " bp1 contig reversed = " << ai._bp1ContigReversed << '\n';
+    os << "bp2 contig offset = " << ai.bp2ContigOffset << " bp2 contig reversed = " << ai._bp2ContigReversed << '\n';
+    dumpSeq("bp1RefSeq",ai.bp1RefSeq,os);
+    if (ai.isSpanning())
+    {
+        dumpSeq("bp2RefSeq",ai.bp2RefSeq,os);
+    }
+    os << "bp1 reference offset = " << ai.bp1RefOffset << '\n';
+    os << "bp2 reference offset = " << ai.bp2RefOffset << '\n';
+    return os;
+}
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVSampleAlleleInfo& sai)
+{
+    static const char indent('\t');
+    os << "SVSampleAlleleInfo:\n"
+       << indent << "confidentSpanningPairCount: " << sai.confidentSpanningPairCount << '\n'
+       << indent << "confidentSemiMappedSpanningPairCount: " << sai.confidentSemiMappedSpanningPairCount << '\n'
+       << indent << "splitReadCount: " << sai.splitReadCount << '\n'
+       << indent << "confidentSplitReadCount: " << sai.confidentSplitReadCount << '\n'
+       ;
+    return os;
+}
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVSampleInfo& si)
+{
+    os << "SVSampleInfo:\n"
+       << "Alt Allele " << si.alt
+       << "Ref Allele " << si.ref
+       ;
+    return os;
+}
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVScoreInfo& ssi)
+{
+    os << "SVScoreInfo bp1MaxDepth=" << ssi.bp1MaxDepth << " bp2MaxDepth=" << ssi.bp2MaxDepth << '\n'
+       << "SVScoreInfo bp1MQ0Frac=" << ssi.bp1MQ0Frac << " bp2MQ0Frac=" << ssi.bp2MQ0Frac << '\n';
+
+    for (const auto& sample : ssi.samples)
+    {
+        os << "Sample info " << sample;
+    }
+    return os;
+}
+
diff --git a/src/c++/lib/manta/SVScoreInfo.hh b/src/c++/lib/manta/SVScoreInfo.hh
new file mode 100644
index 0000000..762d7b6
--- /dev/null
+++ b/src/c++/lib/manta/SVScoreInfo.hh
@@ -0,0 +1,197 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders and Xiaoyu Chen
+///
+
+#pragma once
+
+#include "manta/SVCandidateAssemblyData.hh"
+#include "manta/SVCandidate.hh"
+
+#include <iosfwd>
+#include <string>
+
+
+struct SVAlignmentInfo
+{
+    SVAlignmentInfo(
+        const SVCandidate& sv,
+        const SVCandidateAssemblyData& assemblyData);
+
+    const std::string&
+    bp1ContigSeq() const
+    {
+        return (_bp1ContigReversed ? revContigSeq : contigSeq);
+    }
+
+    const std::string&
+    bp2ContigSeq() const
+    {
+        return (_bp2ContigReversed ? revContigSeq : contigSeq);
+    }
+
+    const std::string&
+    bp1ReferenceSeq() const
+    {
+        return bp1RefSeq;
+    }
+
+    const std::string&
+    bp2ReferenceSeq() const
+    {
+        return (_isSpanning ? bp2RefSeq : bp1RefSeq);
+    }
+
+    bool
+    isSpanning() const
+    {
+        return _isSpanning;
+    }
+
+    /// do we have a enough room on either side of the breakend for
+    /// both the ref and contig to make a fair split read evaluation?
+    bool
+    isMinBpEdge(const unsigned minEdge) const;
+
+    friend
+    std::ostream&
+    operator<<(std::ostream& os, const SVAlignmentInfo& ai);
+
+private:
+    std::string contigSeq;
+    std::string revContigSeq;
+    std::string bp1RefSeq;
+    std::string bp2RefSeq;
+    const bool _isSpanning;
+    const bool _bp1ContigReversed;
+    const bool _bp2ContigReversed;
+
+public:
+    /// all offset range 'begin' values correspond to the zero-indexed base immediately before the breakend on the fwd-strand,
+    /// and 'end' values correspond to the zero-indexed base immediately before the breakend on the forward strand+microhomology range
+    /// In the absence of microhomology, begin and end should be equal.
+    known_pos_range2 bp1ContigOffset;
+    known_pos_range2 bp2ContigOffset;
+    known_pos_range2 bp1RefOffset;
+    known_pos_range2 bp2RefOffset;
+};
+
+std::ostream&
+operator<<(std::ostream& os, const SVAlignmentInfo& ai);
+
+
+
+/// sample-specific and allele-specific evidence info
+struct SVSampleAlleleInfo
+{
+    void
+    clear()
+    {
+        spanningPairCount = 0;
+        confidentSpanningPairCount = 0;
+        confidentSemiMappedSpanningPairCount = 0;
+        splitReadCount = 0;
+        splitReadEvidence = 0;
+        splitReadMapQ = 0;
+        confidentSplitReadCount = 0;
+        confidentSplitReadAndPairCountRefBp1 = 0;
+        confidentSplitReadAndPairCountRefBp2 = 0;
+    }
+
+    // allele pair support
+    unsigned spanningPairCount = 0;  ///< all mapped pairs compatible with the allele
+    unsigned confidentSpanningPairCount = 0;  ///< pairs where both reads are mapped and we've successfully looked up a fragment prob of 0.01 or more
+    unsigned confidentSemiMappedSpanningPairCount = 0; ///< pairs where at least one read is mapped and we've successfully looked up a fragment prob of 0.01 or more
+
+    // allele split support
+    unsigned splitReadCount = 0;
+    float splitReadEvidence = 0;
+    float splitReadMapQ = 0;
+
+    unsigned confidentSplitReadCount = 0; ///< count by comparing alignment quality vs the other allele
+
+    unsigned confidentSplitReadAndPairCountRefBp1 = 0; ///< For 'ref' alleles, the support by split reads and spanning pairs at bp1
+    unsigned confidentSplitReadAndPairCountRefBp2 = 0; ///< For 'ref' alleles, the support at bp2
+
+};
+
+std::ostream&
+operator<<(std::ostream& os, const SVSampleAlleleInfo& si);
+
+
+
+/// sample-specific evidence info
+struct SVSampleInfo
+{
+    void
+    clear()
+    {
+        alt.clear();
+        ref.clear();
+    }
+
+    SVSampleAlleleInfo alt;
+    SVSampleAlleleInfo ref;
+};
+
+std::ostream&
+operator<<(std::ostream& os, const SVSampleInfo& si);
+
+
+/// consolidate model-agnostic scoring results applied to an SV candidate
+struct SVScoreInfo
+{
+    void
+    setSampleCount(
+        const unsigned sampleCount)
+    {
+        samples.resize(sampleCount);
+        clear();
+    }
+
+    void
+    clear()
+    {
+        for (auto& sample : samples)
+        {
+            sample.clear();
+        }
+
+        bp1MaxDepth = 0;
+        bp2MaxDepth = 0;
+
+        bp1MQ0Frac = 0.;
+        bp2MQ0Frac = 0.;
+    }
+
+    std::vector<SVSampleInfo> samples;
+
+    unsigned bp1MaxDepth = 0;
+    unsigned bp2MaxDepth = 0;
+
+    float bp1MQ0Frac = 0;
+    float bp2MQ0Frac = 0;
+};
+
+
+std::ostream&
+operator<<(std::ostream& os, const SVScoreInfo& ssi);
diff --git a/src/c++/lib/manta/SVScoreInfoDiploid.cpp b/src/c++/lib/manta/SVScoreInfoDiploid.cpp
new file mode 100644
index 0000000..08aba83
--- /dev/null
+++ b/src/c++/lib/manta/SVScoreInfoDiploid.cpp
@@ -0,0 +1,75 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "manta/SVScoreInfoDiploid.hh"
+#include "blt_util/log.hh"
+
+#include <iostream>
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVScoreInfoDiploidSample& sids)
+{
+    os << "DiploidSVScoreInfoSample "
+       << " gt=" << DIPLOID_GT::label(sids.gt)
+       << " gtScore=" << sids.gtScore
+       << " pl=";
+
+    for (unsigned gt(0); gt<DIPLOID_GT::SIZE; ++gt)
+    {
+        if (gt!=0) os << ',';
+        os << sids.phredLoghood[gt];
+    }
+    os << " sampleFilters:";
+    for (const std::string& filter : sids.filters)
+    {
+        os << " " << filter;
+    }
+    return os;
+}
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVScoreInfoDiploid& sid)
+{
+    os << "DiploidSVScoreInfo "
+       << " altScore=" << sid.altScore;
+    os << " filters:";
+    for (const std::string& filter : sid.filters)
+    {
+        os << " " << filter;
+    }
+    os << "\n";
+    for (const auto& sample : sid.samples)
+    {
+        os << sample << "\n";
+    }
+    return os;
+}
diff --git a/src/c++/lib/manta/SVScoreInfoDiploid.hh b/src/c++/lib/manta/SVScoreInfoDiploid.hh
new file mode 100644
index 0000000..d39f281
--- /dev/null
+++ b/src/c++/lib/manta/SVScoreInfoDiploid.hh
@@ -0,0 +1,185 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders and Xiaoyu Chen
+///
+
+#pragma once
+
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+
+#include <iosfwd>
+#include <set>
+#include <string>
+#include <vector>
+
+
+namespace DIPLOID_GT
+{
+enum index_t
+{
+    REF,
+    HET,
+    HOM,
+    SIZE
+};
+
+inline
+const char*
+label(const index_t i)
+{
+    switch (i)
+    {
+    case REF :
+        return "ref";
+    case HET :
+        return "het";
+    case HOM :
+        return "hom";
+    default:
+        assert(false && "Unknown GT state");
+        return NULL;
+    }
+}
+
+inline
+const char*
+label(const unsigned i)
+{
+    return label(static_cast<index_t>(i));
+}
+
+inline
+float
+altFraction(const index_t i)
+{
+    switch (i)
+    {
+    case REF :
+        return 0;
+    case HET :
+        return 0.5;
+    case HOM :
+        return 1.0;
+    default:
+        assert(false && "Unknown GT state");
+        return 0;
+    }
+}
+
+inline
+double
+altLnFraction(const index_t i)
+{
+    static const double val[] = { std::log(0.), std::log(0.5), std::log(1.) };
+    switch (i)
+    {
+    case REF :
+        return val[0];
+    case HET :
+        return val[1];
+    case HOM :
+        return val[2];
+    default:
+        assert(false && "Unknown GT state");
+        return 0;
+    }
+}
+
+inline
+double
+altLnCompFraction(const index_t i)
+{
+    return altLnFraction(static_cast<index_t>(2-i));
+}
+
+}
+
+
+
+struct SVScoreInfoDiploidSample
+{
+    SVScoreInfoDiploidSample()
+        : phredLoghood(DIPLOID_GT::SIZE,0)
+    {}
+
+    void
+    clear()
+    {
+        filters.clear();
+        gt=DIPLOID_GT::REF;
+        gtScore=0;
+        std::fill(phredLoghood.begin(),phredLoghood.end(),0);
+    }
+
+    std::set<std::string> filters;
+
+    DIPLOID_GT::index_t gt = DIPLOID_GT::REF;
+
+    unsigned gtScore = 0; ///< quality score of genotype
+
+    std::vector<unsigned> phredLoghood;
+};
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVScoreInfoDiploidSample& sid);
+
+
+
+/// consolidate all germline scoring results applied to an SV candidate
+struct SVScoreInfoDiploid
+{
+    void
+    setSampleCount(
+        const unsigned sampleCount)
+    {
+        samples.resize(sampleCount);
+    }
+
+    void
+    clear()
+    {
+        filters.clear();
+        altScore=0;
+        for (auto& sample : samples)
+        {
+            sample.clear();
+        }
+    }
+
+    std::set<std::string> filters;
+
+    unsigned altScore = 0; ///< quality score indicating any non-reference state (regardless of specific genotype)
+
+    std::vector<SVScoreInfoDiploidSample> samples;
+};
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVScoreInfoDiploid& sid);
diff --git a/src/c++/lib/manta/SVScoreInfoSomatic.cpp b/src/c++/lib/manta/SVScoreInfoSomatic.cpp
new file mode 100644
index 0000000..a25c794
--- /dev/null
+++ b/src/c++/lib/manta/SVScoreInfoSomatic.cpp
@@ -0,0 +1,43 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "manta/SVScoreInfoSomatic.hh"
+#include "blt_util/log.hh"
+
+#include <iostream>
+
+
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVScoreInfoSomatic& sis)
+{
+    os << "SomaticSVScoreInfo somaticScore: " << sis.somaticScore << " sstier: " << sis.somaticScoreTier << " filters: ";
+    for (const std::string& filter : sis.filters)
+    {
+        os << " " << filter;
+    }
+    return os;
+}
diff --git a/src/c++/lib/manta/SVScoreInfoSomatic.hh b/src/c++/lib/manta/SVScoreInfoSomatic.hh
new file mode 100644
index 0000000..13cec0c
--- /dev/null
+++ b/src/c++/lib/manta/SVScoreInfoSomatic.hh
@@ -0,0 +1,181 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders and Xiaoyu Chen
+///
+
+#pragma once
+
+#include <cassert>
+#include <cstdlib>
+
+#include <iosfwd>
+#include <set>
+#include <string>
+#include <cmath>
+
+
+namespace SOMATIC_GT
+{
+// TODO: estimated from tumor
+const double SOMATIC_MUTATION_FREQ = 0.6;
+
+enum index_t
+{
+    REF,
+    HET,
+    HOM,
+    SOM,
+    NOISE,
+    SIZE
+};
+
+inline
+const char*
+label(const index_t i)
+{
+    switch (i)
+    {
+    case REF :
+        return "ref";
+    case HET :
+        return "het";
+    case HOM :
+        return "hom";
+    case SOM :
+        return "som";
+    case NOISE :
+        return "noise";
+    default:
+        assert(false && "Unknown GT state");
+        return NULL;
+    }
+}
+
+inline
+const char*
+label(const unsigned i)
+{
+    return label(static_cast<index_t>(i));
+}
+
+inline
+float
+altFraction(
+    const index_t i,
+    const float somaticFreq,
+    const float noiseFreq)
+{
+    switch (i)
+    {
+    case REF :
+        return 0;
+    case HET :
+        return 0.5;
+    case HOM :
+        return 1.0;
+    case SOM:
+        return somaticFreq;
+    case NOISE:
+        return noiseFreq;
+    default:
+        assert(false && "Unknown GT state");
+        return 0;
+    }
+}
+
+inline
+double
+altLnFraction(
+    const index_t i,
+    const double somaticFreq,
+    const double noiseFreq)
+{
+    static const double val[] = { std::log(0.), std::log(0.5), std::log(1.) };
+
+    switch (i)
+    {
+    case REF :
+        return val[0];
+    case HET :
+        return val[1];
+    case HOM :
+        return val[2];
+    case SOM:
+        return std::log(somaticFreq);
+    case NOISE:
+        return std::log(noiseFreq);
+    default:
+        assert(false && "Unknown GT state");
+        return 0;
+    }
+}
+
+inline
+double
+altLnCompFraction(
+    const index_t i,
+    const double somaticFreq,
+    const double noiseFreq)
+{
+    static const double val[] = { std::log(1.), std::log(0.5), std::log(0.) };
+
+    switch (i)
+    {
+    case REF :
+        return val[0];
+    case HET :
+        return val[1];
+    case HOM :
+        return val[2];
+    case SOM:
+        return std::log(1-somaticFreq);
+    case NOISE:
+        return std::log(1-noiseFreq);
+    default:
+        assert(false && "Unknown GT state");
+        return 0;
+    }
+}
+}
+
+
+/// consolidate all somatic scoring results applied to an SV candidate
+struct SVScoreInfoSomatic
+{
+    void
+    clear()
+    {
+        filters.clear();
+        somaticScore=0;
+        somaticScoreTier=0;
+    }
+
+    std::set<std::string> filters;
+
+    unsigned somaticScore = 0;
+    unsigned char somaticScoreTier = 0;
+};
+
+
+std::ostream&
+operator<<(std::ostream& os, const SVScoreInfoSomatic& sis);
+
diff --git a/src/c++/lib/manta/SVScoreInfoTumor.hh b/src/c++/lib/manta/SVScoreInfoTumor.hh
new file mode 100644
index 0000000..425b175
--- /dev/null
+++ b/src/c++/lib/manta/SVScoreInfoTumor.hh
@@ -0,0 +1,142 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Xiaoyu Chen
+///
+
+#pragma once
+
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+
+#include <iosfwd>
+#include <set>
+#include <string>
+
+
+namespace TUMOR_GT
+{
+enum index_t
+{
+    REF,
+    HET,
+    HOM,
+    SIZE
+};
+
+inline
+const char*
+label(const index_t i)
+{
+    switch (i)
+    {
+    case REF :
+        return "ref";
+    case HET :
+        return "het";
+    case HOM :
+        return "hom";
+    default:
+        assert(false && "Unknown GT state");
+        return NULL;
+    }
+}
+
+inline
+const char*
+label(const unsigned i)
+{
+    return label(static_cast<index_t>(i));
+}
+
+inline
+float
+altFraction(const index_t i)
+{
+    switch (i)
+    {
+    case REF :
+        return 0;
+    case HET :
+        //TODO: fix this prior for low-AF
+        return 0.5;
+    case HOM :
+        return 1.0;
+    default:
+        assert(false && "Unknown GT state");
+        return 0;
+    }
+}
+
+inline
+double
+altLnFraction(const index_t i)
+{
+    //TODO: fix value of HET for low-AF
+    static const double val[] = { std::log(0.), std::log(0.5), std::log(1.) };
+    switch (i)
+    {
+    case REF :
+        return val[0];
+    case HET :
+        return val[1];
+    case HOM :
+        return val[2];
+    default:
+        assert(false && "Unknown GT state");
+        return 0;
+    }
+}
+
+inline
+double
+altLnCompFraction(const index_t i)
+{
+    return altLnFraction(static_cast<index_t>(2-i));
+}
+
+}
+
+/// consolidate all tumor-only scoring results applied to an SV candidate
+struct SVScoreInfoTumor
+{
+    void
+    clear()
+    {
+        filters.clear();
+        gt=TUMOR_GT::REF;
+        altScore=0;
+        gtScore=0;
+    }
+
+    std::set<std::string> filters;
+
+    TUMOR_GT::index_t gt = TUMOR_GT::REF;
+
+    unsigned altScore = 0; ///< quality score indicating any non-reference state (regardless of specific genotype)
+    unsigned gtScore = 0; ///< quality score of genotype
+};
+
+std::ostream&
+operator<<(
+    std::ostream& os,
+    const SVScoreInfoTumor& sid);
diff --git a/src/c++/lib/manta/ShadowReadFinder.cpp b/src/c++/lib/manta/ShadowReadFinder.cpp
new file mode 100644
index 0000000..d224b8e
--- /dev/null
+++ b/src/c++/lib/manta/ShadowReadFinder.cpp
@@ -0,0 +1,120 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Ole Schulz-Trieglaff
+///
+
+#include "ShadowReadFinder.hh"
+
+#include "htsapi/bam_record_util.hh"
+
+
+#ifdef DEBUG_IS_SHADOW
+#include <iostream>
+#include "blt_util/log.hh"
+#endif
+
+
+static
+bool
+isGoodShadow(
+    const bam_record& bamRead,
+    const std::string& lastQname)
+{
+#ifdef DEBUG_IS_SHADOW
+    static const std::string logtag("isGoodShadow");
+#endif
+
+    if (! bamRead.is_paired()) return false;
+
+    if (bamRead.isNonStrictSupplement()) return false;
+
+    // sanity check that this is a shadow read:
+    if (!bamRead.is_unmapped()) return false;
+    if (bamRead.is_mate_unmapped()) return false;
+
+    static const unsigned minAvgQualShadow = 25;
+    if (get_avg_quality(bamRead) < minAvgQualShadow)
+    {
+        return false;
+    }
+
+    if (strcmp(bamRead.qname(),lastQname.c_str()) != 0)
+    {
+        // something went wrong here, shadows should have their singleton partner
+        // preceding them in the BAM file.
+#ifdef DEBUG_IS_SHADOW
+        log_os << logtag << " ERROR: Shadow without matching singleton : " << bamRead.qname() << " vs " << lastQname << std::endl;
+#endif
+        return false;
+    }
+
+#ifdef DEBUG_IS_SHADOW
+    log_os << logtag << " Found shadow!\n";
+            << logtag << " this mapq  = " << ((unsigned int)bamRead.map_qual()) << std::endl;
+            << logtag << " last qname = " << lastQname << std::endl;
+#endif
+
+    return true;
+}
+
+
+/// check for shadow anchor status
+///
+bool
+ShadowReadFinder::
+isShadowAnchor(
+    const bam_record& bamRead,
+    const bool isSearchForLeftOpen,
+    const bool isSearchForRightOpen) const
+{
+    if (! bamRead.is_paired()) return false;
+    if (bamRead.is_unmapped()) return false;
+    if (! bamRead.is_mate_unmapped()) return false;
+    if ((! isSearchForLeftOpen) && (! bamRead.is_fwd_strand())) return false;
+    if ((! isSearchForRightOpen) && bamRead.is_fwd_strand()) return false;
+    if (bamRead.map_qual() < _minMapq) return false;
+    return true;
+}
+
+
+
+void
+ShadowReadFinder::
+setAnchor(
+    const bam_record& bamRead)
+{
+    _lastMapq  = bamRead.map_qual();
+    _lastQname = bamRead.qname();
+    _isLastSet = true;
+}
+
+
+
+bool
+ShadowReadFinder::
+isShadow(
+    const bam_record& bamRead)
+{
+    if (! _isLastSet) return false;
+    _isLastSet = false;
+    return isGoodShadow(bamRead, _lastQname);
+}
diff --git a/src/c++/lib/manta/ShadowReadFinder.hh b/src/c++/lib/manta/ShadowReadFinder.hh
new file mode 100644
index 0000000..d8da8ef
--- /dev/null
+++ b/src/c++/lib/manta/ShadowReadFinder.hh
@@ -0,0 +1,128 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Ole Schulz-Trieglaff
+///
+
+#pragma once
+
+#include "htsapi/bam_record.hh"
+
+#include <string>
+
+
+/// encapsulates the logic of checking for shadow reads assuming that they've been placed
+/// consecutively after their mapped mate read
+///
+/// basic usage:
+///
+/// ShadowReadFinder checker(blah)
+/// for bam_region in regions :
+///     for bam_record in bam_region :
+///         checker.check(bam_record)
+///     checker.reset()
+///
+struct ShadowReadFinder
+{
+    ShadowReadFinder(
+        const unsigned minMapq,
+        const bool isSearchForLeftOpen = true,
+        const bool isSearchForRightOpen = true) :
+        _minMapq(minMapq),
+        _isLeftDefault(isSearchForLeftOpen),
+        _isRightDefault(isSearchForRightOpen),
+        _isLastSet(false),
+        _lastMapq(0)
+    {}
+
+    /// reset mate tracking info
+    void
+    reset()
+    {
+        _isLastSet=false;
+    }
+
+    /// all in one single method interface to shadow finder
+    ///
+    /// if this is called only once for each read it will return
+    /// true for any unmapped shadow read, assuming the common convention
+    /// that unmapped shadows follow their anchor.
+    ///
+    bool
+    check(const bam_record& bamRead)
+    {
+        if (isShadow(bamRead)) return true;
+        if (isShadowAnchor(bamRead)) setAnchor(bamRead);
+        return false;
+    }
+
+    /// only valid after check() is true
+    unsigned
+    getMateMapq() const
+    {
+        return _lastMapq;
+    }
+
+    bool
+    isShadowMate() const
+    {
+        return _isLastSet;
+    }
+
+    /// the following methods are subcomponents of the check() system above --
+    /// you probably only want to use one or the other
+
+    /// check for shadow anchor status
+    ///
+    /// uses default left-open, right-open values
+    bool
+    isShadowAnchor(
+        const bam_record& bamRead) const
+    {
+        return isShadowAnchor(bamRead,_isLeftDefault,_isRightDefault);
+    }
+
+    /// check for shadow anchor status
+    ///
+    bool
+    isShadowAnchor(
+        const bam_record& bamRead,
+        const bool isSearchForLeftOpen,
+        const bool isSearchForRightOpen) const;
+
+    void
+    setAnchor(
+        const bam_record& bamRead);
+
+    bool
+    isShadow(
+        const bam_record& bamRead);
+
+
+private:
+
+    const unsigned _minMapq;
+    const bool _isLeftDefault;
+    const bool _isRightDefault;
+    bool _isLastSet;
+    uint8_t _lastMapq;
+    std::string _lastQname;
+};
diff --git a/src/c++/lib/manta/test/CMakeLists.txt b/src/c++/lib/manta/test/CMakeLists.txt
new file mode 100644
index 0000000..8164e48
--- /dev/null
+++ b/src/c++/lib/manta/test/CMakeLists.txt
@@ -0,0 +1,29 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## Configuration file for the unit tests subdirectory
+##
+## author Ole Schulz-Trieglaff
+##
+################################################################################
+
+set(ADDITIONAL_UNITTEST_LIB manta_svgraph manta_blt_util)
+include(${THIS_CXX_TEST_LIBRARY_CMAKE})
diff --git a/src/c++/lib/manta/test/SVLocusScannerSemiAlignedTest.cpp b/src/c++/lib/manta/test/SVLocusScannerSemiAlignedTest.cpp
new file mode 100644
index 0000000..51ec2b9
--- /dev/null
+++ b/src/c++/lib/manta/test/SVLocusScannerSemiAlignedTest.cpp
@@ -0,0 +1,176 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/test/unit_test.hpp"
+
+#include "htsapi/align_path_bam_util.hh"
+#include "htsapi/SimpleAlignment_bam_util.hh"
+#include "manta/SVLocusScannerSemiAligned.cpp"
+
+#include "boost/scoped_array.hpp"
+
+
+BOOST_AUTO_TEST_SUITE( test_SVLocusScannerSemiAligned )
+
+
+/// the reference sequence is assumed to be a clip starting at the query alignment start position:
+///
+static
+void
+semiAlignTestCase(
+    const pos_t alignPos,
+    const char* querySeq,
+    const char* refSeq,
+    unsigned& leadingLength,
+    pos_t& leadingRefPos,
+    unsigned& trailingLength,
+    pos_t& trailingRefPos)
+{
+    leadingLength=0;
+    leadingRefPos=0;
+    trailingLength=0;
+    trailingRefPos=0;
+
+    const unsigned querySize(strlen(querySeq));
+
+    ALIGNPATH::path_t inputPath;
+    inputPath.push_back(ALIGNPATH::path_segment(ALIGNPATH::MATCH,querySize));
+
+    bam_record bamRead;
+    bam1_t* bamDataPtr(bamRead.get_data());
+    edit_bam_cigar(inputPath,*bamDataPtr);
+
+    reference_contig_segment testRefSeg;
+    testRefSeg.seq() = refSeq;
+    testRefSeg.set_offset(alignPos);
+
+    // initialize test qual array to all Q30's:
+    boost::scoped_array<uint8_t> qual(new uint8_t[querySize]);
+    for (unsigned i(0); i<querySize; ++i)
+    {
+        qual[i] = 30;
+    }
+
+    edit_bam_read_and_quality(querySeq, qual.get(), *bamDataPtr);
+
+    SimpleAlignment align(getAlignment(bamRead));
+    align.pos = alignPos;
+
+    edgeMismatchLength(align, bamRead.get_bam_read(), testRefSeg, 5,
+                       leadingLength, leadingRefPos, trailingLength, trailingRefPos);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_getSVCandidatesFromSemiAligned_null )
+{
+    static const pos_t alignPos(500);
+    static const char querySeq[] = "AACCCACAAACATCACACACAAGAGTCCAGAGACCGACTTTTTTCTAAAA";
+    static const char refSeq[]   = "AACCCACAAACATCACACACAAGAGTCCAGAGACCGACTTTTTTCTAAAA";
+
+    unsigned leadingLength(0), trailingLength(0);
+    pos_t leadingRefPos(0), trailingRefPos(0);
+
+    semiAlignTestCase(alignPos, querySeq,refSeq,leadingLength,leadingRefPos,trailingLength, trailingRefPos);
+
+    BOOST_REQUIRE_EQUAL(leadingLength,0u);
+    BOOST_REQUIRE_EQUAL(trailingLength,0u);
+    BOOST_REQUIRE_EQUAL(leadingRefPos,alignPos);
+    BOOST_REQUIRE_EQUAL(trailingRefPos,alignPos+50);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_getSVCandidatesFromSemiAligned_leading )
+{
+    static const pos_t alignPos(500);
+    static const char querySeq[] = "AACCCACAAACATCACACACAAGAGTCCAGAGACCGACTTTTTTCTAAAA";
+    static const char refSeq[]   = "AACCTTTTTTCATCACACACAAGAGTCCAGAGACCGACTTTTTTCTAAAA";
+
+    unsigned leadingLength(0), trailingLength(0);
+    pos_t leadingRefPos(0), trailingRefPos(0);
+
+    semiAlignTestCase(alignPos, querySeq,refSeq,leadingLength,leadingRefPos,trailingLength, trailingRefPos);
+
+    BOOST_REQUIRE_EQUAL(leadingLength,10u);
+    BOOST_REQUIRE_EQUAL(trailingLength,0u);
+    BOOST_REQUIRE_EQUAL(leadingRefPos,alignPos+10);
+    BOOST_REQUIRE_EQUAL(trailingRefPos,alignPos+50);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_getSVCandidatesFromSemiAligned_trailing )
+{
+    static const pos_t alignPos(500);
+    static const char querySeq[] = "AACCCACAAACATCACACACAAGAGTCCAGAGACCGACTTTTTTCTAAAA";
+    static const char refSeq[]   = "AACCCACAAACATCACACACAAGAGTCCAGAGACCGACTTCCCCCCAAAA";
+
+    unsigned leadingLength(0), trailingLength(0);
+    pos_t leadingRefPos(0), trailingRefPos(0);
+
+    semiAlignTestCase(alignPos, querySeq,refSeq,leadingLength,leadingRefPos,trailingLength, trailingRefPos);
+
+    BOOST_REQUIRE_EQUAL(leadingLength,0u);
+    BOOST_REQUIRE_EQUAL(trailingLength,10u);
+    BOOST_REQUIRE_EQUAL(leadingRefPos,alignPos);
+    BOOST_REQUIRE_EQUAL(trailingRefPos,alignPos+50-10);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_getSVCandidatesFromSemiAligned_both )
+{
+    static const pos_t alignPos(500);
+    static const char querySeq[] = "AACCCACAAACATCACACACAAGAGTCCAGAGACCGACTTTTTTCTAAAA";
+    static const char refSeq[]   = "AACCTTTTTTCATCACACACAAGAGTCCAGAGACCGACTTCCCCCCAAAA";
+
+    unsigned leadingLength(0), trailingLength(0);
+    pos_t leadingRefPos(0), trailingRefPos(0);
+
+    semiAlignTestCase(alignPos, querySeq,refSeq,leadingLength,leadingRefPos,trailingLength, trailingRefPos);
+
+    BOOST_REQUIRE_EQUAL(leadingLength,10u);
+    BOOST_REQUIRE_EQUAL(trailingLength,10u);
+    BOOST_REQUIRE_EQUAL(leadingRefPos,alignPos+10);
+    BOOST_REQUIRE_EQUAL(trailingRefPos,alignPos+50-10);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_getSVCandidatesFromSemiAligned_mismatch )
+{
+    static const pos_t alignPos(500);
+    static const char querySeq[] = "AACCCACAAACATCACACACAAGAGTCCAGAGACCGACTTTTTTCTAAAA";
+    static const char refSeq[]   = "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG";
+
+    unsigned leadingLength(0), trailingLength(0);
+    pos_t leadingRefPos(0), trailingRefPos(0);
+
+    semiAlignTestCase(alignPos, querySeq,refSeq,leadingLength,leadingRefPos,trailingLength, trailingRefPos);
+
+    BOOST_REQUIRE_EQUAL(leadingLength,50u);
+    BOOST_REQUIRE_EQUAL(trailingLength,50u);
+    BOOST_REQUIRE_EQUAL(leadingRefPos,alignPos+50);
+    BOOST_REQUIRE_EQUAL(trailingRefPos,alignPos+50-50);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/manta/test/SVLocusScannerTest.cpp b/src/c++/lib/manta/test/SVLocusScannerTest.cpp
new file mode 100644
index 0000000..6402f98
--- /dev/null
+++ b/src/c++/lib/manta/test/SVLocusScannerTest.cpp
@@ -0,0 +1,63 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/test/unit_test.hpp"
+
+#include "htsapi/SimpleAlignment_bam_util.hh"
+#include "manta/SVLocusScanner.cpp"
+
+
+BOOST_AUTO_TEST_SUITE( test_SVLocusScanner )
+
+
+BOOST_AUTO_TEST_CASE( test_getSVCandidatesFromReadIndels )
+{
+    const bool isRNA(false);
+    const bool isStranded(true);
+    const ReadScannerOptions opt;
+    const ReadScannerDerivOptions dopt(opt,isRNA,isStranded);
+
+    ALIGNPATH::path_t inputPath;
+    cigar_to_apath("100M2000D100M",inputPath);
+
+    bam_record bamRead;
+    bam1_t* bamDataPtr(bamRead.get_data());
+    edit_bam_cigar(inputPath,*bamDataPtr);
+
+    SimpleAlignment align(getAlignment(bamRead));
+
+    std::vector<SVObservation> candidates;
+
+    bam_header_info hdr_info;
+
+    getSVCandidatesFromReadIndels(opt, dopt, align, FRAGSOURCE::UNKNOWN, candidates);
+
+    BOOST_REQUIRE_EQUAL(candidates.size(),1u);
+    BOOST_REQUIRE(candidates[0].bp1.interval.range.is_pos_intersect(100));
+    BOOST_REQUIRE(candidates[0].bp2.interval.range.is_pos_intersect(2100));
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/manta/test/test_main.cpp b/src/c++/lib/manta/test/test_main.cpp
new file mode 100644
index 0000000..031e8ec
--- /dev/null
+++ b/src/c++/lib/manta/test/test_main.cpp
@@ -0,0 +1,23 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#define BOOST_TEST_MODULE libmanta
+#include "boost/test/unit_test.hpp"
+
diff --git a/src/c++/lib/options/AlignmentFileOptions.hh b/src/c++/lib/options/AlignmentFileOptions.hh
new file mode 100644
index 0000000..7f1e13a
--- /dev/null
+++ b/src/c++/lib/options/AlignmentFileOptions.hh
@@ -0,0 +1,33 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+
+/// bam input file object shared by all programs which require these as input
+struct AlignmentFileOptions
+{
+    std::vector<std::string> alignmentFilename;
+    std::vector<bool> isAlignmentTumor; ///< indicates which positions in the alignmnetFilename correspond to tumor
+};
diff --git a/src/c++/lib/options/AlignmentFileOptionsParser.cpp b/src/c++/lib/options/AlignmentFileOptionsParser.cpp
new file mode 100644
index 0000000..4c0f696
--- /dev/null
+++ b/src/c++/lib/options/AlignmentFileOptionsParser.cpp
@@ -0,0 +1,102 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "options/optionsUtil.hh"
+#include "options/AlignmentFileOptionsParser.hh"
+
+#include <set>
+
+
+typedef std::vector<std::string> files_t;
+
+
+
+boost::program_options::options_description
+getOptionsDescription(
+    AlignmentFileOptions& /*opt*/)
+{
+    namespace po = boost::program_options;
+    po::options_description desc("alignment-files");
+    desc.add_options()
+    ("align-file", po::value<files_t>(),
+     "alignment file in BAM or CRAM format (may be specified multiple times, assumed to be non-tumor if tumor file(s) provided)")
+    ("tumor-align-file", po::value<files_t>(),
+     "tumor sample alignment file in BAM or CRAM format (may be specified multiple times)")
+    ;
+    return desc;
+}
+
+
+bool
+parseOptions(
+    const boost::program_options::variables_map& vm,
+    AlignmentFileOptions& opt,
+    std::string& errorMsg)
+{
+    // paste together tumor and normal:
+    {
+        files_t normal;
+        files_t tumor;
+        if (vm.count("align-file"))
+        {
+            normal=(boost::any_cast<files_t>(vm["align-file"].value()));
+        }
+        if (vm.count("tumor-align-file"))
+        {
+            tumor=(boost::any_cast<files_t>(vm["tumor-align-file"].value()));
+        }
+        opt.alignmentFilename = normal;
+        opt.alignmentFilename.insert(opt.alignmentFilename.end(),
+                                     tumor.begin(),
+                                     tumor.end());
+        opt.isAlignmentTumor.clear();
+        opt.isAlignmentTumor.resize(normal.size(), false);
+        opt.isAlignmentTumor.resize(opt.alignmentFilename.size(), true);
+    }
+
+    errorMsg.clear();
+    if (opt.alignmentFilename.empty())
+    {
+        errorMsg="Must specify at least one input alignment file";
+    }
+    else
+    {
+        // check that alignment files exist, and names do not repeat
+        std::set<std::string> nameCheck;
+        for (std::string& afile : opt.alignmentFilename)
+        {
+            if (checkStandardizeInputFile(afile,"alignment file",errorMsg)) break;
+            if (nameCheck.count(afile))
+            {
+                std::ostringstream oss;
+                oss << "Repeated alignment filename: " << afile << "\n";
+                errorMsg = oss.str();
+                break;
+            }
+            nameCheck.insert(afile);
+        }
+    }
+
+    return (! errorMsg.empty());
+}
diff --git a/src/c++/lib/options/AlignmentFileOptionsParser.hh b/src/c++/lib/options/AlignmentFileOptionsParser.hh
new file mode 100644
index 0000000..4499f55
--- /dev/null
+++ b/src/c++/lib/options/AlignmentFileOptionsParser.hh
@@ -0,0 +1,43 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "options/AlignmentFileOptions.hh"
+
+#include "boost/program_options.hpp"
+
+#include <string>
+
+
+boost::program_options::options_description
+getOptionsDescription(
+    AlignmentFileOptions& opt);
+
+
+bool
+parseOptions(
+    const boost::program_options::variables_map& vm,
+    AlignmentFileOptions& opt,
+    std::string& errorMsg);
diff --git a/src/c++/lib/options/CMakeLists.txt b/src/c++/lib/options/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/options/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/options/CallOptionsDiploid.cpp b/src/c++/lib/options/CallOptionsDiploid.cpp
new file mode 100644
index 0000000..2962ae4
--- /dev/null
+++ b/src/c++/lib/options/CallOptionsDiploid.cpp
@@ -0,0 +1,46 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "options/CallOptionsDiploid.hh"
+
+
+boost::program_options::options_description
+getOptionsDescription(CallOptionsDiploid& opt)
+{
+    namespace po = boost::program_options;
+    po::options_description desc("germline-variant-calling");
+    desc.add_options()
+    ("diploid-max-depth-factor", po::value(&opt.maxDepthFactor)->default_value(opt.maxDepthFactor),
+     "Variants where the depth around the breakpoint is greater than this factor x the chromosomal mean will be filtered out")
+    ("min-qual-score", po::value(&opt.minOutputAltScore)->default_value(opt.minOutputAltScore),
+     "minimum QUAL score for variants included in the germline output vcf")
+    ("min-pass-qual-score", po::value(&opt.minPassAltScore)->default_value(opt.minPassAltScore),
+     "minimum QUAL score for variants to PASS in germline output vcf")
+    ("min-pass-gt-score", po::value(&opt.minPassGTScore)->default_value(opt.minPassGTScore),
+     "minimum genotype quality score below which samples are filtered for a variant in the germline output vcf")
+    ;
+
+    return desc;
+}
+
diff --git a/src/c++/lib/options/CallOptionsDiploid.hh b/src/c++/lib/options/CallOptionsDiploid.hh
new file mode 100644
index 0000000..15b2f25
--- /dev/null
+++ b/src/c++/lib/options/CallOptionsDiploid.hh
@@ -0,0 +1,61 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "boost/program_options.hpp"
+
+
+struct CallOptionsDiploid
+{
+    float indelPrior = 1e-5f;
+
+    /// breakpoints where the non-tumor depth is greater than the chromosome average x this factor are filtered out:
+    float maxDepthFactor = 3.0f;
+    std::string maxDepthFilterLabel = "MaxDepth";
+
+    /// minimum QUAL score to print out a diploid variant
+    unsigned minOutputAltScore = 10;
+
+    /// minimum QUAL score to PASS a diploid variant
+    unsigned minPassAltScore = 20;
+    std::string minAltFilterLabel = "MinQUAL";
+
+    /// below this GQ value, the SAMPLE filter is marked in the VCF
+    unsigned minPassGTScore = 10;
+    std::string minGTFilterLabel = "MinGQ";
+
+    // control filtration based on MQ0 fraction:
+    float maxMQ0Frac = 0.4f;
+    std::string maxMQ0FracLabel = "MaxMQ0Frac";
+
+    /// filter for large SVs with no pair support
+    std::string noPairSupportLabel = "NoPairSupport";
+
+    std::string rnaFilterLabel = "RNAFail";
+};
+
+
+boost::program_options::options_description
+getOptionsDescription(CallOptionsDiploid& opt);
diff --git a/src/c++/lib/options/CallOptionsShared.hh b/src/c++/lib/options/CallOptionsShared.hh
new file mode 100644
index 0000000..24e541d
--- /dev/null
+++ b/src/c++/lib/options/CallOptionsShared.hh
@@ -0,0 +1,38 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+
+/// options shared by multiple scoring schemes:
+///
+/// Note that in theory these could be offered once for each scoring scheme, but
+/// it would be difficult to do this efficiently because these options have an impact
+/// on early scoring likelihoods.
+///
+struct CallOptionsShared
+{
+    /// This influences alignments to the ref allele when comparing ref vs alt align quality
+    float snpPrior = 1e-3f;
+};
diff --git a/src/c++/lib/options/CallOptionsSomatic.cpp b/src/c++/lib/options/CallOptionsSomatic.cpp
new file mode 100644
index 0000000..11b161d
--- /dev/null
+++ b/src/c++/lib/options/CallOptionsSomatic.cpp
@@ -0,0 +1,47 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "options/CallOptionsSomatic.hh"
+
+
+boost::program_options::options_description
+getOptionsDescription(CallOptionsSomatic& opt)
+{
+    namespace po = boost::program_options;
+    po::options_description desc("somatic-variant-calling");
+    desc.add_options()
+    ("somatic-max-depth-factor", po::value(&opt.maxDepthFactor)->default_value(opt.maxDepthFactor),
+     "Variants where the normal-sample depth around the breakpoint is greater than this factor x the chromosomal mean will be filtered out")
+    ("min-somatic-score", po::value(&opt.minOutputSomaticScore)->default_value(opt.minOutputSomaticScore),
+     "minimum somatic quality score for variants to be included in the somatic output vcf")
+    ("min-pass-somatic-score", po::value(&opt.minPassSomaticScore)->default_value(opt.minPassSomaticScore),
+     "minimum somatic quality score below which variants are marked as filtered in the somatic output vcf")
+    /*
+        ("noise-sv-prior", po::value(&opt.noiseSVPrior)->default_value(opt.noiseSVPrior),
+         "probability of a spurious SV observation shared in the tumor and normal samples")
+    */
+    ;
+
+    return desc;
+}
diff --git a/src/c++/lib/options/CallOptionsSomatic.hh b/src/c++/lib/options/CallOptionsSomatic.hh
new file mode 100644
index 0000000..a8df15d
--- /dev/null
+++ b/src/c++/lib/options/CallOptionsSomatic.hh
@@ -0,0 +1,59 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "boost/program_options.hpp"
+
+
+struct CallOptionsSomatic
+{
+    float germlineSVPrior = 1e-5f;
+    float somaticSVPrior = 1e-7f;
+
+    /// small/large values below reflect our expectation that there is more shared small event noise in small events
+    ///
+    /// expected shared tumor-normal sample noise rates for "small" SVs, ramp is from 3k->5k for small to large.
+    float smallNoiseSVPrior = 1e-9f;
+    /// expected shared tumor-normal sample noise rates for "large" SVs
+    float largeNoiseSVPrior = 1e-10f;
+
+    /// breakpoints where the non-tumor depth is greater than the chromosome average x this factor
+    /// are filtered out:
+    float maxDepthFactor = 3.0f;
+    std::string maxDepthFilterLabel = "MaxDepth";
+
+    /// minimum somatic quality to print out a somatic variant
+    unsigned minOutputSomaticScore = 10;
+    /// minimum somatic quality which passes vcf filtration
+    unsigned minPassSomaticScore = 30;
+    std::string minSomaticScoreLabel = "MinSomaticScore";
+
+    float maxMQ0Frac = 0.4f;
+    std::string maxMQ0FracLabel = "MaxMQ0Frac";
+};
+
+
+boost::program_options::options_description
+getOptionsDescription(CallOptionsSomatic& opt);
diff --git a/src/c++/lib/options/CallOptionsTumor.cpp b/src/c++/lib/options/CallOptionsTumor.cpp
new file mode 100644
index 0000000..f3b3855
--- /dev/null
+++ b/src/c++/lib/options/CallOptionsTumor.cpp
@@ -0,0 +1,44 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Xiaoyu Chen
+///
+
+#include "options/CallOptionsTumor.hh"
+
+
+boost::program_options::options_description
+getOptionsDescription(CallOptionsTumor& opt)
+{
+    namespace po = boost::program_options;
+    po::options_description desc("tumor-only-variant-calling");
+    desc.add_options()
+    ("tumor-max-depth-factor", po::value(&opt.maxDepthFactor)->default_value(opt.maxDepthFactor),
+     "Variants where the tumor-sample depth around the breakpoint is greater than this factor x the chromosomal mean will be filtered out")
+    ;
+
+    return desc;
+}
+
+
+
+
+
diff --git a/src/c++/lib/options/CallOptionsTumor.hh b/src/c++/lib/options/CallOptionsTumor.hh
new file mode 100644
index 0000000..5076e52
--- /dev/null
+++ b/src/c++/lib/options/CallOptionsTumor.hh
@@ -0,0 +1,59 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Xiaoyu Chen
+///
+
+#pragma once
+
+#include "boost/program_options.hpp"
+
+
+struct CallOptionsTumor
+{
+    //float germlineSVPrior = 1e-5f;
+    //float somaticSVPrior = 1e-7f;
+
+    /// small/large values below reflect our expectation that there is more shared small event noise in small events
+    ///
+    /// expected shared tumor-normal sample noise rates for "small" SVs, ramp is from 3k->5k for small to large.
+    //float smallNoiseSVPrior = 1e-9f;
+    /// expected shared tumor-normal sample noise rates for "large" SVs
+    //float largeNoiseSVPrior = 1e-10f;
+
+    /// breakpoints where the non-tumor depth is greater than the chromosome average x this factor
+    /// are filtered out:
+    float maxDepthFactor = 3.0f;
+    std::string maxDepthFilterLabel = "MaxDepth";
+
+    /// minimum somatic quality to print out a somatic variant
+    //unsigned minOutputSomaticScore = 10;
+    /// minimum somatic quality which passes vcf filtration
+    //unsigned minPassSomaticScore = 30;
+    //std::string minSomaticScoreLabel = "MinSomaticScore";
+
+    float maxMQ0Frac = 0.4f;
+    std::string maxMQ0FracLabel = "MaxMQ0Frac";
+};
+
+
+boost::program_options::options_description
+getOptionsDescription(CallOptionsTumor& opt);
diff --git a/src/c++/lib/options/IterativeAssemblerOptions.hh b/src/c++/lib/options/IterativeAssemblerOptions.hh
new file mode 100644
index 0000000..12b52d3
--- /dev/null
+++ b/src/c++/lib/options/IterativeAssemblerOptions.hh
@@ -0,0 +1,65 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Xiaoyu Chen
+/// \author Ole Schulz-Trieglaff
+///
+
+#pragma once
+
+
+/// Input parameters for IterativeAssembler
+///
+struct IterativeAssemblerOptions
+{
+    IterativeAssemblerOptions() {}
+
+    /// the symbol set used during assembly
+    std::string alphabet = "ACGT";
+
+    /// minimum basecall quality for assembly input
+    int minQval = 5;
+
+    /// initial word (kmer) length
+    unsigned minWordLength = 41;
+
+    unsigned maxWordLength = 76;
+    unsigned wordStepSize = 5;
+    unsigned minContigLength = 15;
+
+    /// min. coverage required for contig extension
+    unsigned minCoverage = 1;
+
+    /// coverage required for conservative contig sub-range
+    unsigned minConservativeCoverage = 2;
+
+    /// max error rates allowed during contig extension
+    double maxError = 0.35;
+
+    /// min. number of unused reads to enable search for more contigs
+    unsigned minUnusedReads = 3;
+
+    /// min. number of reads required to start assembly
+    unsigned minSupportReads = 2;
+
+    /// Max. number of assembly returned for a given set of reads
+    unsigned maxAssemblyCount = 10;
+};
diff --git a/src/c++/lib/options/ReadScannerOptions.hh b/src/c++/lib/options/ReadScannerOptions.hh
new file mode 100644
index 0000000..2747542
--- /dev/null
+++ b/src/c++/lib/options/ReadScannerOptions.hh
@@ -0,0 +1,111 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+
+struct ReadScannerOptions
+{
+    ReadScannerOptions() {}
+
+    /// standard MAPQ filter applied during locus generation and some/not all subsequent steps
+    unsigned minMapq = 15;
+
+    /// a second, lower mapq threshold used only during somatic calling to disprove a
+    /// somatic candidate using weak normal sample evidence
+    unsigned minTier2Mapq = 5;
+
+    /// report breakend regions with x prob regions removed from each edge
+    float breakendEdgeTrimProb = 0.25f;
+
+    /// report breakend regions with x prob regions removed from each edge
+    /// used only for 'large-scale' events.
+    float largeScaleEventBreakendEdgeTrimProb = 0.1f;
+
+    /// report a pair as "proper pair" if fragment size is within x prob region removed from each edge
+    float properPairTrimProb = 0.01f;
+
+    /// add a pair to the evidence pool if frag size is within x prob region removed from each edge
+    float evidenceTrimProb = 0.15f;
+
+    /// fragment length to search upstream of a breakend for shadow read support
+    float shadowSearchRangeProb = 0.05f;
+
+    /// multiplier for fragment length
+    float shadowSearchRangeFactor = 1.2f;
+
+    /// ignore indels smaller than this when building graph, constructing candidates and scoring output:
+    unsigned minCandidateVariantSize = 10;
+
+    /// if minCandidateVariantSize is set higher than this value, then we ignore non-specific assembly evidence
+    /// (like soft-clip and poor alignment) during candidate generation
+    unsigned maxCandidateSizeForLocalAssmEvidence = 100;
+
+    /// whenever a breakend is predicted from a read pair junction, the predicted breakend
+    /// range should be no smaller than this:
+    unsigned minPairBreakendSize = 40;
+
+    /// whenever a breakend is predicted from an individual read split (ie. non-assembled),
+    /// set the predicted breakend size to this fraction of the
+    /// event size (modified by the min and max limits below)
+    float splitBreakendSizeFraction = 0.1f;
+
+    /// whenever a breakend is predicted from an individual read split (ie. non-assembled),
+    /// the predicted breakend range should be no larger than this:
+    unsigned maxSplitBreakendSize = 100;
+
+    /// whenever a breakend is predicted from an individual read split (ie. non-assembled),
+    /// the predicted breakend range should be no smaller than this:
+    unsigned minSplitBreakendSize = 10;
+
+    /// Semi-aligned regions (including soft-clipped) need to be at least this long to be included as SV evidence
+    ///
+    unsigned minSemiAlignedMismatchLen = 8;
+
+    /// Minimal length of a cis SV candidate in RNA data
+    unsigned minRNACisLength = 100000;
+    /// Minimal length of any SV candidate in RNA data
+    unsigned minRNALength = 1000;
+
+    /// Accept semi-aligned reads with at least this hypothesis score
+    /// different for graph and candidate generation
+    double minSemiAlignedScoreGraph = 180.0;
+    double minSemiAlignedScoreCandidates = 180.0;
+
+    /// min MAPQ for shadow mate used to build SV adjacency graph
+    unsigned minSingletonMapqGraph = 30;
+
+    /// min MAPQ for shadow mate used for candidate assembly and scoring
+    unsigned minSingletonMapqCandidates = 15;
+
+    /// typically set true for RNA-Seq analysis, where proper-pair is used to signal intron-spanning pairs
+    bool isIgnoreAnomProperPair = false;
+
+    /// the maximum depth at which input reads are considered in graph creation/assembly, etc.
+    /// (when avg chrom depths are provided)
+    float maxDepthFactor = 12;
+
+    /// the maximum depth for a whole locus for remote read retrieval (ie. MAPQ0 chimera mates retrieved for large insertion assembly)
+    float maxDepthFactorRemoteReads = 7;
+};
diff --git a/src/c++/lib/options/ReadScannerOptionsParser.cpp b/src/c++/lib/options/ReadScannerOptionsParser.cpp
new file mode 100644
index 0000000..fb4b1a6
--- /dev/null
+++ b/src/c++/lib/options/ReadScannerOptionsParser.cpp
@@ -0,0 +1,64 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "options/ReadScannerOptionsParser.hh"
+
+
+boost::program_options::options_description
+getOptionsDescription(ReadScannerOptions& opt)
+{
+    namespace po = boost::program_options;
+    po::options_description desc("read-scanner");
+    desc.add_options()
+    ("min-candidate-sv-size", po::value(&opt.minCandidateVariantSize)->default_value(opt.minCandidateVariantSize),
+     "Indels below this size will not be discovered or reported as candidates")
+    ("min-mapq", po::value(&opt.minMapq)->default_value(opt.minMapq),
+     "Reads with MAPQ less than this value will be ignored")
+    ("edge-prob", po::value(&opt.breakendEdgeTrimProb)->default_value(opt.breakendEdgeTrimProb),
+     "Breakend range associated with each read will trimmed to expected fragment quantile range [p,(1-p)], p: edge-prob")
+    ("ignore-anom-proper-pair", po::value(&opt.isIgnoreAnomProperPair)->zero_tokens(),
+     "Disregard anomalous fragment sizes if the BAM record has the proper pair bit set. "
+     "This flag is typically set for RNA-SEQ analysis where the proper-pair bit is used to indicate an intron-spanning read pair.")
+    ;
+
+    return desc;
+}
+
+
+
+bool
+parseOptions(
+    const boost::program_options::variables_map& /*vm*/,
+    ReadScannerOptions& opt,
+    std::string& errorMsg)
+{
+    errorMsg.clear();
+    if ((opt.breakendEdgeTrimProb <= 0) || (opt.breakendEdgeTrimProb >= 1.0))
+    {
+        errorMsg="edge-prob argument is restricted to (0,1)";
+    }
+
+    return (! errorMsg.empty());
+
+}
diff --git a/src/c++/lib/options/ReadScannerOptionsParser.hh b/src/c++/lib/options/ReadScannerOptionsParser.hh
new file mode 100644
index 0000000..ee3b74e
--- /dev/null
+++ b/src/c++/lib/options/ReadScannerOptionsParser.hh
@@ -0,0 +1,40 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "options/ReadScannerOptions.hh"
+
+#include "boost/program_options.hpp"
+
+
+boost::program_options::options_description
+getOptionsDescription(ReadScannerOptions& opt);
+
+
+bool
+parseOptions(
+    const boost::program_options::variables_map& vm,
+    ReadScannerOptions& opt,
+    std::string& errorMsg);
diff --git a/src/c++/lib/options/SVLocusSetOptions.hh b/src/c++/lib/options/SVLocusSetOptions.hh
new file mode 100644
index 0000000..5e4595b
--- /dev/null
+++ b/src/c++/lib/options/SVLocusSetOptions.hh
@@ -0,0 +1,61 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+
+/// parameters specific to SVLocusSet:
+///
+struct SVLocusSetOptions
+{
+    explicit
+    SVLocusSetOptions(
+        const unsigned initObservationWeight = 1) :
+        observationWeight(initObservationWeight),
+        minMergeEdgeObservations(3),
+        maxSearchCount(500),
+        maxSearchDensity(0.5)
+    {}
+
+    unsigned
+    getMinMergeEdgeCount() const
+    {
+        return (observationWeight*minMergeEdgeObservations);
+    }
+
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /* version */)
+    {
+        ar& observationWeight;
+        ar& minMergeEdgeObservations;
+        ar& maxSearchCount;
+        ar& maxSearchDensity;
+    }
+
+    unsigned observationWeight; ///< used to translate graph edges counts to observations
+    unsigned minMergeEdgeObservations; ///< to reduce noise in the graph, we only merge once shared edges reach this number of observations
+    unsigned maxSearchCount; ///< the search for intersecting regions in the graph stops once this number is reached
+    float maxSearchDensity; ///< the search for intersecting regions in the graph stops once this many regions/base are found
+};
+
diff --git a/src/c++/lib/options/SVLocusSetOptionsParser.cpp b/src/c++/lib/options/SVLocusSetOptionsParser.cpp
new file mode 100644
index 0000000..2b29c82
--- /dev/null
+++ b/src/c++/lib/options/SVLocusSetOptionsParser.cpp
@@ -0,0 +1,58 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "options/SVLocusSetOptionsParser.hh"
+
+
+boost::program_options::options_description
+getOptionsDescription(SVLocusSetOptions& opt)
+{
+    namespace po = boost::program_options;
+    po::options_description desc("sv-locus-graph");
+    desc.add_options()
+    ("min-edge-observations", po::value(&opt.minMergeEdgeObservations)->default_value(opt.minMergeEdgeObservations),
+     "Minimum number of supporting observations required to retain a graph edge")
+    ;
+
+    return desc;
+}
+
+
+
+bool
+parseOptions(
+    const boost::program_options::variables_map& /*vm*/,
+    SVLocusSetOptions& /*opt*/,
+    std::string& errorMsg)
+{
+    errorMsg.clear();
+#if 0
+    if ((opt.breakendEdgeTrimProb <= 0) || (opt.breakendEdgeTrimProb >= 1.0))
+    {
+        errorMsg="edge-prob argument is restricted to (0,1)";
+    }
+#endif
+    return (! errorMsg.empty());
+
+}
diff --git a/src/c++/lib/options/SVLocusSetOptionsParser.hh b/src/c++/lib/options/SVLocusSetOptionsParser.hh
new file mode 100644
index 0000000..c699f15
--- /dev/null
+++ b/src/c++/lib/options/SVLocusSetOptionsParser.hh
@@ -0,0 +1,40 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "options/SVLocusSetOptions.hh"
+
+#include "boost/program_options.hpp"
+
+
+boost::program_options::options_description
+getOptionsDescription(SVLocusSetOptions& opt);
+
+
+bool
+parseOptions(
+    const boost::program_options::variables_map& vm,
+    SVLocusSetOptions& opt,
+    std::string& errorMsg);
diff --git a/src/c++/lib/options/SVRefinerOptions.hh b/src/c++/lib/options/SVRefinerOptions.hh
new file mode 100644
index 0000000..e9ade60
--- /dev/null
+++ b/src/c++/lib/options/SVRefinerOptions.hh
@@ -0,0 +1,90 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "alignment/AlignmentScores.hh"
+#include "options/IterativeAssemblerOptions.hh"
+#include "options/SmallAssemblerOptions.hh"
+
+//#define ITERATIVE_ASSEMBLER
+#ifdef ITERATIVE_ASSEMBLER
+typedef IterativeAssemblerOptions AssemblerOptions;
+#else
+typedef SmallAssemblerOptions AssemblerOptions;
+#endif
+
+namespace SUPERTMP
+{
+static const int largeGapOpenScore(-24);
+}
+
+
+/// Options for the SV refiner step
+///
+/// Note that we have two categories of options for assembly and alignment,
+/// one for small events, and one for large events
+///
+struct SVRefinerOptions
+{
+    /// match, mismatch, open score ratios taken from bwa defaults (but not extend!) :
+    ///
+    SVRefinerOptions() :
+        smallSVAlignScores(2, -8, -12, 0, -1),
+        largeSVAlignScores(2, -8, -18, -1, -1),
+        largeInsertEdgeAlignScores(2, -8, -18, -1, -1),
+        largeInsertCompleteAlignScores(2, -8,  SUPERTMP::largeGapOpenScore, 0, -1),
+        spanningAlignScores(2, -8, -12, -1, -1),
+        largeGapOpenScore(SUPERTMP::largeGapOpenScore),
+        jumpScore(-25),
+        RNAspanningAlignScores(2, -8, -19, -1, -1),
+        RNAJumpScore(-100),
+        RNAIntronOpenScore(-15),
+        RNAIntronOffEdgeScore(-1)
+    {
+        spanningAssembleOpt.minContigLength=75; ///< For breakend-spanning assemblies we require a larger contig than for small-variant assemblies
+        RNAspanningAssembleOpt.minContigLength = 75; ///< For breakend-spanning assemblies we require a larger contig than for small-variant assemblies
+        RNAspanningAssembleOpt.minWordLength = 31; /// Use smaller kmer for RNA
+
+    }
+
+    /// parameters for small SV assembly/alignment:
+    AlignmentScores<int> smallSVAlignScores;
+    AlignmentScores<int> largeSVAlignScores; // large SV but at a single assembly locus
+    AlignmentScores<int> largeInsertEdgeAlignScores;
+    AlignmentScores<int> largeInsertCompleteAlignScores;
+    AssemblerOptions smallSVAssembleOpt;
+
+    // parameters for large SV assembly/alignment:
+    AlignmentScores<int> spanningAlignScores;
+    const int largeGapOpenScore;
+    const int jumpScore;
+    AlignmentScores<int> RNAspanningAlignScores;
+    const int RNAJumpScore;
+    const int RNAIntronOpenScore;
+    const int RNAIntronOffEdgeScore;
+    AssemblerOptions spanningAssembleOpt;
+    AssemblerOptions RNAspanningAssembleOpt;
+
+};
diff --git a/src/c++/lib/options/SmallAssemblerOptions.hh b/src/c++/lib/options/SmallAssemblerOptions.hh
new file mode 100644
index 0000000..bf708a9
--- /dev/null
+++ b/src/c++/lib/options/SmallAssemblerOptions.hh
@@ -0,0 +1,60 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Ole Schulz-Trieglaff
+///
+
+#pragma once
+
+
+/// Input parameters for SmallAssembler, a simple local de-bruijn graph assembler
+///
+struct SmallAssemblerOptions
+{
+    SmallAssemblerOptions() {}
+
+    /// the symbol set used during assembly
+    std::string alphabet = "ACGT";
+
+    /// minimum basecall quality for assembly input
+    uint8_t minQval = 5;
+
+    /// initial word (kmer) length
+    unsigned minWordLength = 41;
+    unsigned maxWordLength = 76;
+    unsigned wordStepSize = 5;
+    unsigned minContigLength = 15;
+
+    /// min. coverage required for contig extension
+    unsigned minCoverage = 1;
+
+    /// coverage required for conservative contig sub-range
+    unsigned minConservativeCoverage = 2;
+
+    /// max error rates allowed during contig extension
+    double maxError = 0.35;
+
+    /// min. number of reads required to start assembly
+    unsigned minSeedReads = 3;
+
+    /// Max. number of assembly iterations for a cluster before we give up
+    unsigned maxAssemblyIterations = 10;
+};
diff --git a/src/c++/lib/options/optionsUtil.cpp b/src/c++/lib/options/optionsUtil.cpp
new file mode 100644
index 0000000..0b7ebb8
--- /dev/null
+++ b/src/c++/lib/options/optionsUtil.cpp
@@ -0,0 +1,55 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#include "optionsUtil.hh"
+
+#include "boost/filesystem.hpp"
+
+#include <sstream>
+
+
+
+bool
+checkStandardizeInputFile(
+    std::string& filename,
+    const char* fileLabel,
+    std::string& errorMsg)
+{
+    errorMsg.clear();
+
+    if (filename.empty())
+    {
+        std::ostringstream oss;
+        oss << "Must specify " << fileLabel << " file";
+        errorMsg = oss.str();
+    }
+    else if (! boost::filesystem::exists(filename))
+    {
+        std::ostringstream oss;
+        oss << "Can't find " << fileLabel << " file '" << filename << "'";
+        errorMsg = oss.str();
+    }
+    else
+    {
+        filename = boost::filesystem::absolute(filename).string();
+    }
+
+    return (! errorMsg.empty());
+}
diff --git a/src/c++/lib/options/optionsUtil.hh b/src/c++/lib/options/optionsUtil.hh
new file mode 100644
index 0000000..39a975c
--- /dev/null
+++ b/src/c++/lib/options/optionsUtil.hh
@@ -0,0 +1,35 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#pragma once
+
+#include <string>
+
+
+/// check if input file exists and is usable as
+/// input, if so canonicalize the name
+///
+/// In case of error return true and provide error
+/// message
+bool
+checkStandardizeInputFile(
+    std::string& filename,
+    const char* fileLabel,
+    std::string& errorMsg);
diff --git a/src/c++/lib/svgraph/CMakeLists.txt b/src/c++/lib/svgraph/CMakeLists.txt
new file mode 100644
index 0000000..977b8e2
--- /dev/null
+++ b/src/c++/lib/svgraph/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+include(${THIS_CXX_LIBRARY_CMAKE})
diff --git a/src/c++/lib/svgraph/EdgeInfo.cpp b/src/c++/lib/svgraph/EdgeInfo.cpp
new file mode 100644
index 0000000..255a485
--- /dev/null
+++ b/src/c++/lib/svgraph/EdgeInfo.cpp
@@ -0,0 +1,48 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "EdgeInfo.hh"
+
+#include <iostream>
+
+
+
+void
+EdgeInfo::
+write(std::ostream& os) const
+{
+    static const char sep(':');
+    os << locusIndex << sep << nodeIndex1 << sep << nodeIndex2;
+}
+
+
+
+std::ostream&
+operator<<(std::ostream& os, const EdgeInfo& ei)
+{
+    os << "edgeinfo locus:node1:node2: ";
+    ei.write(os);
+    os << '\n';
+    return os;
+}
diff --git a/src/c++/lib/svgraph/EdgeInfo.hh b/src/c++/lib/svgraph/EdgeInfo.hh
new file mode 100644
index 0000000..172733f
--- /dev/null
+++ b/src/c++/lib/svgraph/EdgeInfo.hh
@@ -0,0 +1,50 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "svgraph/SVLocus.hh"
+
+#include <iosfwd>
+
+
+struct EdgeInfo
+{
+    /// minimal ascii representation:
+    void
+    write(std::ostream& os) const;
+
+    bool
+    isSelfEdge() const
+    {
+        return (nodeIndex1 == nodeIndex2);
+    }
+
+    LocusIndexType locusIndex = 0;
+    NodeIndexType nodeIndex1 = 0;
+    NodeIndexType nodeIndex2 = 0;
+};
+
+std::ostream&
+operator<<(std::ostream& os, const EdgeInfo& ei);
diff --git a/src/c++/lib/svgraph/EdgeInfoUtil.cpp b/src/c++/lib/svgraph/EdgeInfoUtil.cpp
new file mode 100644
index 0000000..51f836c
--- /dev/null
+++ b/src/c++/lib/svgraph/EdgeInfoUtil.cpp
@@ -0,0 +1,65 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "svgraph/EdgeInfoUtil.hh"
+
+
+
+bool
+testIsolatedEdge(
+    const SVLocusSet& cset,
+    const EdgeInfo& edge)
+{
+    if (edge.nodeIndex1 != edge.nodeIndex2) return false;
+
+    const SVLocus& locus(cset.getLocus(edge.locusIndex));
+#if 0
+    // simple criteria -- make sure there are no other nodes in locus
+    return (locus.size() == 1);
+#endif
+
+    // search to check to see if any bidirectional edges extend from this node (other than the self-edge):
+    const SVLocusNode& node1(locus.getNode(edge.nodeIndex1));
+    const SVLocusEdgeManager node1Manager(node1.getEdgeManager());
+
+    typedef SVLocusEdgesType::const_iterator edgeiter_t;
+    edgeiter_t edgeIter(node1Manager.getMap().begin());
+    const edgeiter_t edgeIterEnd(node1Manager.getMap().end());
+
+    EdgeInfo testEdge(edge);
+
+    unsigned edgeCount(0);
+    unsigned biEdgeCount(0);
+    for (; edgeIter != edgeIterEnd; ++edgeIter)
+    {
+        testEdge.nodeIndex2 = (edgeIter->first);
+        if (testEdge.nodeIndex1 == testEdge.nodeIndex2) continue;
+        edgeCount++;
+        if (isBidirectionalEdge(cset, testEdge)) biEdgeCount++;
+    }
+
+    const bool isLowBiEdge((biEdgeCount >= 1) && (biEdgeCount <= 2));
+    const bool isLowTotalEdge(edgeCount <= 4);
+    return (! (isLowBiEdge && isLowTotalEdge));
+}
diff --git a/src/c++/lib/svgraph/EdgeInfoUtil.hh b/src/c++/lib/svgraph/EdgeInfoUtil.hh
new file mode 100644
index 0000000..cdcc500
--- /dev/null
+++ b/src/c++/lib/svgraph/EdgeInfoUtil.hh
@@ -0,0 +1,50 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "svgraph/EdgeInfo.hh"
+#include "svgraph/SVLocusSet.hh"
+
+
+inline
+bool
+isBidirectionalEdge(
+    const SVLocusSet& cset,
+    const EdgeInfo& edge)
+{
+    const unsigned minEdgeCount(cset.getMinMergeEdgeCount());
+
+    const SVLocus& locus(cset.getLocus(edge.locusIndex));
+
+    return ((locus.getEdge(edge.nodeIndex1,edge.nodeIndex2).getCount() >= minEdgeCount) &&
+            (locus.getEdge(edge.nodeIndex2,edge.nodeIndex1).getCount() >= minEdgeCount));
+}
+
+
+/// determine if this is a self-edge of a node with no other (bidirectional-pass) edges:
+bool
+testIsolatedEdge(
+    const SVLocusSet& cset,
+    const EdgeInfo& edge);
diff --git a/src/c++/lib/svgraph/GenomeInterval.cpp b/src/c++/lib/svgraph/GenomeInterval.cpp
new file mode 100644
index 0000000..5c71f6f
--- /dev/null
+++ b/src/c++/lib/svgraph/GenomeInterval.cpp
@@ -0,0 +1,36 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "svgraph/GenomeInterval.hh"
+
+#include <iostream>
+
+
+
+std::ostream&
+operator<<(std::ostream& os, const GenomeInterval& gi)
+{
+    os << "GenomeInterval: " << gi.tid << ":" << gi.range;
+    return os;
+}
diff --git a/src/c++/lib/svgraph/GenomeInterval.hh b/src/c++/lib/svgraph/GenomeInterval.hh
new file mode 100644
index 0000000..26fdb60
--- /dev/null
+++ b/src/c++/lib/svgraph/GenomeInterval.hh
@@ -0,0 +1,91 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/known_pos_range2.hh"
+
+#include <iosfwd>
+
+
+/// single chromosome range
+///
+/// all internal locations use a chromosome index number
+struct GenomeInterval
+{
+    GenomeInterval(
+        const int32_t initTid = 0,
+        const pos_t beginPos = 0,
+        const pos_t endPos = 0) :
+        tid(initTid),
+        range(beginPos,endPos)
+    {}
+
+    /// does this intersect a second GenomeInterval?
+    bool
+    isIntersect(const GenomeInterval& gi) const
+    {
+        if (tid != gi.tid) return false;
+        return range.is_range_intersect(gi.range);
+    }
+
+    bool
+    operator<(const GenomeInterval& rhs) const
+    {
+        if (tid<rhs.tid) return true;
+        if (tid == rhs.tid)
+        {
+            return (range<rhs.range);
+        }
+        return false;
+    }
+
+    bool
+    operator==(const GenomeInterval& rhs) const
+    {
+        return ((tid==rhs.tid) && (range==rhs.range));
+    }
+
+    void
+    clear()
+    {
+        tid = 0;
+        range.clear();
+    }
+
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /* version */)
+    {
+        ar& tid& range;
+    }
+
+    int32_t tid;
+    known_pos_range2 range;
+};
+
+
+std::ostream&
+operator<<(std::ostream& os, const GenomeInterval& gi);
+
+BOOST_CLASS_IMPLEMENTATION(GenomeInterval, boost::serialization::object_serializable)
diff --git a/src/c++/lib/svgraph/GenomeIntervalTracker.hh b/src/c++/lib/svgraph/GenomeIntervalTracker.hh
new file mode 100644
index 0000000..70bfd71
--- /dev/null
+++ b/src/c++/lib/svgraph/GenomeIntervalTracker.hh
@@ -0,0 +1,64 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "GenomeInterval.hh"
+#include "blt_util/RegionTracker.hh"
+
+#include <iosfwd>
+
+
+struct GenomeIntervalTracker
+{
+    void
+    clear()
+    {
+        for (auto& r : _regions)
+        {
+            r.clear();
+        }
+    }
+
+    void
+    addInterval(
+        const GenomeInterval& gi)
+    {
+        assert(gi.tid >= 0);
+        if (static_cast<unsigned>(gi.tid) >= _regions.size()) _regions.resize(gi.tid+1);
+        _regions[gi.tid].addRegion(gi.range);
+    }
+
+    bool
+    isSubsetOfRegion(
+        const GenomeInterval& gi) const
+    {
+        assert(gi.tid >= 0);
+        if (static_cast<unsigned>(gi.tid) >= _regions.size()) return false;
+        return _regions[gi.tid].isSubsetOfRegion(gi.range);
+    }
+
+private:
+    std::vector<RegionTracker> _regions;
+};
diff --git a/src/c++/lib/svgraph/GenomeIntervalUtil.cpp b/src/c++/lib/svgraph/GenomeIntervalUtil.cpp
new file mode 100644
index 0000000..df03ef4
--- /dev/null
+++ b/src/c++/lib/svgraph/GenomeIntervalUtil.cpp
@@ -0,0 +1,74 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "svgraph/GenomeIntervalUtil.hh"
+
+
+
+std::vector<unsigned>
+intervalCompressor(
+    std::vector<GenomeInterval>& intervals)
+{
+    std::vector<GenomeInterval> intervals2;
+
+    const unsigned count(intervals.size());
+    std::vector<bool> isTransfered(count,false);
+
+    std::vector<unsigned> indexMap(count,0);
+
+    for (unsigned headIndex(0); headIndex < count; ++headIndex)
+    {
+        if (isTransfered[headIndex]) continue;
+
+        const unsigned headIndex2(intervals2.size());
+        isTransfered[headIndex] = true;
+        indexMap[headIndex] = headIndex2;
+        intervals2.push_back(intervals[headIndex]);
+        GenomeInterval& headInterval(intervals2.back());
+
+        while (true)
+        {
+            bool isComplete(true);
+
+            for (unsigned testIndex(headIndex+1); testIndex < count; ++testIndex)
+            {
+                if (isTransfered[testIndex]) continue;
+
+                if (headInterval.isIntersect(intervals[testIndex]))
+                {
+                    isTransfered[testIndex] = true;
+                    indexMap[testIndex] = headIndex2;
+                    headInterval.range.merge_range(intervals[testIndex].range);
+                    isComplete=false;
+                    break;
+                }
+            }
+
+            if (isComplete) break;
+        }
+    }
+
+    intervals = intervals2;
+    return indexMap;
+}
diff --git a/src/c++/lib/svgraph/GenomeIntervalUtil.hh b/src/c++/lib/svgraph/GenomeIntervalUtil.hh
new file mode 100644
index 0000000..b9f1041
--- /dev/null
+++ b/src/c++/lib/svgraph/GenomeIntervalUtil.hh
@@ -0,0 +1,39 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "svgraph/GenomeInterval.hh"
+
+#include <vector>
+
+
+/// given a collection of genome intervals, reduce down to the minimum non-overlapping set:
+///
+/// \returns a vector with size equal to the input vector, containing a mapping of the input
+///         interval index to the output interval index
+///
+std::vector<unsigned>
+intervalCompressor(
+    std::vector<GenomeInterval>& intervals);
diff --git a/src/c++/lib/svgraph/SVLocus.cpp b/src/c++/lib/svgraph/SVLocus.cpp
new file mode 100644
index 0000000..c18600d
--- /dev/null
+++ b/src/c++/lib/svgraph/SVLocus.cpp
@@ -0,0 +1,639 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "common/Exceptions.hh"
+#include "svgraph/SVLocus.hh"
+
+#include "boost/foreach.hpp"
+
+#include <iostream>
+#include <stack>
+
+#ifdef DEBUG_SVL
+#include "blt_util/log.hh"
+#endif
+
+
+
+void
+SVLocus::
+nodeHurl(const NodeIndexType nodePtr) const
+{
+    using namespace illumina::common;
+
+    std::ostringstream oss;
+    oss << "ERROR: Attempting to access node: " << _index << ":" << nodePtr << " in locus with size: " << size() << "\n";
+    BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+}
+
+
+
+void
+SVLocus::
+mergeNode(
+    const NodeIndexType fromIndex,
+    const NodeIndexType toIndex,
+    flyweight_observer_t* obs)
+{
+    using namespace illumina::common;
+
+    assert(fromIndex != toIndex);
+
+#ifdef DEBUG_SVL
+    static const std::string logtag("SVLocus::mergeNode");
+    log_os << logtag << " from: " << fromIndex << " to: " << toIndex << " size: " << size() << "\n";
+#endif
+
+    SVLocusNode& fromNode(getNode(fromIndex));
+    SVLocusNode& toNode(getNode(toIndex));
+
+#ifdef DEBUG_SVL
+    log_os << logtag << " BEFORE fromNode: " << fromNode;
+    log_os << logtag << " BEFORE toNode: " << toNode;
+#endif
+
+    if (fromNode.getInterval().tid != toNode.getInterval().tid)
+    {
+        std::ostringstream oss;
+        oss << "ERROR: Attempting to merge nodes on different chromosomes\n"
+            << "\tNode1: " << fromNode
+            << "\tNode2: " << toNode;
+        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+    }
+
+    notifyDelete(obs,toIndex);
+
+    toNode.setIntervalRange(merge_range(toNode.getInterval().range,fromNode.getInterval().range));
+    const bool isToCount(toNode.isOutCount());
+    const bool isFromCount(fromNode.isOutCount());
+    if     ((! isToCount) && (isFromCount))
+    {
+        toNode.setEvidenceRange(fromNode.getEvidenceRange());
+    }
+    else if ((! isFromCount) && (isToCount))
+    {
+        // pass (keep toNode value as is)
+    }
+    else
+    {
+        toNode.setEvidenceRange(merge_range(toNode.getEvidenceRange(),fromNode.getEvidenceRange()));
+    }
+
+    notifyAdd(obs,toIndex);
+
+    // now take all fromNode edges and 'redirect' them to the toNode index
+    //
+    const SVLocusEdgeManager edgeMap(fromNode.getEdgeManager());
+    for (const SVLocusEdgesType::value_type& fromNodeEdgeIter : edgeMap.getMap())
+    {
+        // alias value_type components (not required, but makes the logic easier to follow):
+        const NodeIndexType& fromNodeEdgeIndex(fromNodeEdgeIter.first);
+        const SVLocusEdge* fromNodeEdgePtr(&(fromNodeEdgeIter.second));
+
+#ifdef DEBUG_SVL
+        // is this edge between the to and from nodes?
+        const bool isToFromEdge(fromNodeEdgeIndex == toIndex);
+
+        log_os << logtag << " handle fromEdge: " << _index << ":" << fromNodeEdgeIndex << " isToFromEdge: " << isToFromEdge << "\n";
+#endif
+
+        // is this a self edge of the from node?
+        const bool isSelfFromEdge(fromNodeEdgeIndex == fromIndex);
+
+        if (isSelfFromEdge)
+        {
+            // self-edge needs to be handled as a special case:
+            toNode.mergeEdge(toIndex,*(fromNodeEdgePtr));
+            continue;
+        }
+
+        // Check for the special case when there is an edge between from and to, in this case
+        // the counts have to be handled so that counts in each region still approximate
+        // fragment support. Normally (the chimera case) -- a single fragment will create
+        // edges and nodes with weight X. If this is a non-chimera and the nodes collide and
+        // merge, we want to prevent the evidence from being doubled to 2X when it should not be.
+        //
+        // To achieve this, we take the max edge counts from the two nodes being merged instead
+        // of the sum. This is an approximate solution, but very simple to add into the
+        // graph without blowing up per-node/edge storage.
+        //
+        const bool isFromToEdge(fromNodeEdgeIndex == toIndex);
+        unsigned mergeCount(0);
+        if (isFromToEdge)
+        {
+            auto getNodeEdgeCount = [](const SVLocusNode& node, const NodeIndexType index) -> unsigned
+            {
+                if (! node.isEdge(index)) return 0u;
+                return node.getEdge(index).getCount();
+            };
+
+            // determine what the override edge count should be:
+            const unsigned fromCount(fromNodeEdgePtr->getCount());
+            const unsigned toCount(getNodeEdgeCount(toNode,fromIndex));
+            const unsigned maxCount(std::max(fromCount,toCount));
+            mergeCount = getNodeEdgeCount(toNode,toIndex) + maxCount;
+        }
+
+        // update local edge:
+        toNode.mergeEdge(fromNodeEdgeIndex,*(fromNodeEdgePtr));
+
+        if (isFromToEdge)
+        {
+            toNode.setEdgeCount(toIndex,mergeCount);
+            toNode.setEdgeCount(fromIndex,0);
+        }
+
+        // update remote inputNodeEdgeIter
+        {
+            SVLocusNode& remoteNode(getNode(fromNodeEdgeIndex));
+            try
+            {
+                const SVLocusEdge remoteEdge(remoteNode.getEdge(fromIndex));
+                remoteNode.mergeEdge(toIndex, remoteEdge);
+            }
+            catch (illumina::common::ExceptionData& e)
+            {
+                // decorate an in-flight exception:
+                std::ostringstream oss;
+                oss << "ERROR: Can't find return edge to node index: " << _index << ":" << fromIndex << " in remote node index: " << _index << ":" << fromNodeEdgeIter.first << "\n"
+                    << "\tlocal_node: " << fromNode
+                    << "\tremote_node: " << remoteNode;
+                e << illumina::common::ExceptionMsg(oss.str());
+                throw;
+            }
+        }
+    }
+
+#ifdef DEBUG_SVL
+    log_os << logtag << " AFTER toNode: " << toNode;
+#endif
+
+    clearNodeEdges(fromIndex);
+}
+
+
+void
+SVLocus::
+getEdgeException(
+    const NodeIndexType fromIndex,
+    const NodeIndexType toIndex) const
+{
+    using namespace illumina::common;
+
+    std::ostringstream oss;
+    oss << "ERROR: SVLocus::getEdge() no edge exists\n";
+    oss << "\tfrom_node: " << _index << ":" << fromIndex << " " << getNode(fromIndex);
+    oss << "\tto_node: " << _index << ":" << toIndex << " " << getNode(toIndex);
+    BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+}
+
+
+
+bool
+SVLocus::
+isNoiseNode(
+    const unsigned minMergeEdgeCount,
+    const NodeIndexType nodeIndex) const
+{
+    const SVLocusNode& node(getNode(nodeIndex));
+    const SVLocusEdgeManager edgeMap(node.getEdgeManager());
+    for (const SVLocusEdgesType::value_type& edge : edgeMap.getMap())
+    {
+        if (edge.second.getCount() >= minMergeEdgeCount) return false;
+        if (getEdge(edge.first,nodeIndex).getCount() >= minMergeEdgeCount) return false;
+    }
+    return true;
+}
+
+
+
+unsigned
+SVLocus::
+cleanNodeCore(
+    const unsigned minMergeEdgeCount,
+    const NodeIndexType nodeIndex,
+    std::set<NodeIndexType>& emptyNodes)
+{
+#ifdef DEBUG_SVL
+    static const std::string logtag("SVLocus::cleanNodeCore");
+    log_os << logtag << " nodeAddy: " << _index << ":" << nodeIndex << "\n";
+#endif
+
+    unsigned totalCleaned(0);
+    SVLocusNode& queryNode(getNode(nodeIndex));
+
+    std::vector<NodeIndexType> eraseEdges;
+    const SVLocusEdgeManager edgeMap(queryNode.getEdgeManager());
+    for (const SVLocusEdgesType::value_type& edgeIter : edgeMap.getMap())
+    {
+        const SVLocusEdge* edgePtr(&(edgeIter.second));
+        if (0 != edgePtr->getCount())
+        {
+            if (edgePtr->getCount() < minMergeEdgeCount)
+            {
+                // clean criteria met -- go ahead and erase edge count:
+                totalCleaned += edgePtr->getCount();
+                queryNode.setEdgeCount(edgeIter.first,0);
+
+                // we've just snuck around the const iterator by calling the clearEdge function against this edge,
+                // so we have to fix this by hand:
+                edgePtr=&(queryNode.getEdge(edgeIter.first));
+            }
+        }
+
+        if (0 == edgePtr->getCount())
+        {
+            // if the out edge count is zero, see if the in-edge count is also zero --
+            // if so, erase edge
+            //
+            const SVLocusEdge& fromRemoteEdge(getEdge(edgeIter.first,nodeIndex));
+            if (0 == fromRemoteEdge.getCount())
+            {
+                eraseEdges.push_back(edgeIter.first);
+
+                // also check to see if the remote node will be empty after
+                // this edge deletion:
+                const SVLocusNode& remoteNode(getNode(edgeIter.first));
+                if ((! remoteNode.isOutCount()) &&
+                    (1 == remoteNode.size()))
+                {
+                    emptyNodes.insert(edgeIter.first);
+                }
+            }
+        }
+    }
+
+    // delete empty edges:
+    for (const NodeIndexType toIndex : eraseEdges)
+    {
+#ifdef DEBUG_SVL
+        log_os << logtag << " deleting edge: " << _index << ":" << nodeIndex << "->" << _index << ":" << toIndex << "\n";
+#endif
+        eraseEdgePair(nodeIndex,toIndex);
+    }
+
+    // if true add the target node to the erase list:
+    if ((queryNode.empty()) && (! queryNode.isOutCount()))
+    {
+        emptyNodes.insert(nodeIndex);
+    }
+
+#ifdef DEBUG_SVL
+    log_os << logtag << " emptyEdges:\n";
+    for (const NodeIndexType toIndex : eraseEdges)
+    {
+        log_os << logtag << "\tedge: " << _index << ":" << nodeIndex << "->" << _index << ":" << toIndex << "\n";
+    }
+
+    log_os << "cleanNodeCore emptyNodes\n";
+    for (const NodeIndexType nodeIndex2 : emptyNodes)
+    {
+        log_os << logtag << "\tnodeAddy: " << _index << ":" << nodeIndex2 << "\n";
+    }
+
+    log_os << logtag << " totalCleaned: " << totalCleaned << "\n";
+#endif
+
+    return totalCleaned;
+}
+
+
+
+unsigned
+SVLocus::
+cleanNode(
+    const unsigned minMergeEdgeCount,
+    const NodeIndexType nodeIndex,
+    flyweight_observer_t* obs)
+{
+    std::set<NodeIndexType> emptyNodes;
+    const unsigned totalCleaned(cleanNodeCore(minMergeEdgeCount,nodeIndex,emptyNodes));
+    eraseNodes(emptyNodes, obs);
+    return totalCleaned;
+}
+
+
+
+unsigned
+SVLocus::
+clean(
+    const unsigned minMergeEdgeCount,
+    flyweight_observer_t* obs)
+{
+    std::set<NodeIndexType> emptyNodes;
+    unsigned totalCleaned(0);
+
+    const unsigned nodeSize(size());
+    for (unsigned nodeIndex(0); nodeIndex<nodeSize; ++nodeIndex)
+    {
+        totalCleaned += cleanNodeCore(minMergeEdgeCount,nodeIndex,emptyNodes);
+    }
+    eraseNodes(emptyNodes, obs);
+    return totalCleaned;
+}
+
+
+
+
+void
+SVLocus::
+clearNodeEdges(NodeIndexType nodePtr)
+{
+    using namespace illumina::common;
+
+    static const std::string logtag("SVLocus::clearNodeEdges");
+
+#ifdef DEBUG_SVL
+    log_os << logtag << " from nodeIndex: " << nodePtr << "\n";
+#endif
+
+    SVLocusNode& node(getNode(nodePtr));
+    const SVLocusEdgeManager edgeMap(node.getEdgeManager());
+    for (const SVLocusEdgesType::value_type& edgeIter : edgeMap.getMap())
+    {
+
+#ifdef DEBUG_SVL
+        log_os << logtag << " clearing remote Index: " << edgeIter.first << "\n";
+#endif
+        // skip self edge (otherwise we invalidate iterators in this foreach loop)
+        if (edgeIter.first == nodePtr) continue;
+
+        SVLocusNode& remoteNode(getNode(edgeIter.first));
+        try
+        {
+            remoteNode.eraseEdge(nodePtr);
+        }
+        catch (illumina::common::ExceptionData& e)
+        {
+            std::ostringstream oss;
+            oss << "ERROR: " << logtag << " no return edge on remote node.\n"
+                << "\tlocal_node: " << node
+                << "\tremote_node: " << remoteNode;
+            e << illumina::common::ExceptionMsg(oss.str());
+            throw;
+        }
+    }
+
+    node.clear();
+}
+
+
+
+void
+SVLocus::
+eraseNode(
+    const NodeIndexType nodePtr,
+    flyweight_observer_t* obs)
+{
+    using namespace illumina::common;
+
+    if (nodePtr >= _graph.size()) return;
+
+    clearNodeEdges(nodePtr);
+
+    NodeIndexType fromPtr(_graph.size()-1);
+
+#ifdef DEBUG_SVL
+    static const std::string logtag("SVLocus::eraseNode");
+    log_os << logtag << " " << _index << ":" << nodePtr << " transfer_in: " << _index << ":" << fromPtr << " \n";
+
+    log_os << logtag << " BEFORE: " << getNode(nodePtr) << "\n";
+#endif
+
+    if (fromPtr != nodePtr)
+    {
+#ifdef DEBUG_SVL
+        log_os << logtag << " transfer_in: BEFORE: " << getNode(fromPtr) << "\n";
+#endif
+        // reassign fromNode's remote edges before shifting its address:
+        //
+        bool isHandleSelfEdge(false);
+        SVLocusNode& fromNode(getNode(fromPtr));
+        const SVLocusEdgeManager edgeMap(fromNode.getEdgeManager());
+        for (const SVLocusEdgesType::value_type& edgeIter : edgeMap.getMap())
+        {
+            const bool isSelfEdge(edgeIter.first == fromPtr);
+
+            if (isSelfEdge)
+            {
+                // have to handle this outside the foreach loop so that we
+                // don't invalidate our iterators:
+                isHandleSelfEdge=true;
+                continue;
+            }
+
+            SVLocusNode& remoteNode(getNode(edgeIter.first));
+            remoteNode.moveEdge(fromPtr,nodePtr);
+        }
+
+        if (isHandleSelfEdge)
+        {
+            fromNode.moveEdge(fromPtr,nodePtr);
+        }
+
+        notifyDelete(obs,nodePtr);
+        _graph[nodePtr] = _graph[fromPtr];
+        notifyAdd(obs,nodePtr);
+
+#ifdef DEBUG_SVL
+        log_os << logtag << " transfer_in: AFTER: " << getNode(nodePtr) << "\n";
+#endif
+    }
+    notifyDelete(obs,fromPtr);
+    _graph.resize(fromPtr);
+}
+
+
+
+void
+SVLocus::
+eraseNodes(
+    const std::set<NodeIndexType>& nodes,
+    flyweight_observer_t* obs)
+{
+    if (nodes.empty()) return;
+
+    if (size() == nodes.size())
+    {
+        // if the whole locus is being erased, this is more efficient:
+        clear(obs);
+        return;
+    }
+
+    // partial deletion must be done in descending order:
+    BOOST_REVERSE_FOREACH(const NodeIndexType nodeIndex, nodes)
+    {
+        eraseNode(nodeIndex, obs);
+    }
+}
+
+
+
+unsigned
+SVLocus::
+getNodeInCount(
+    const LocusIndexType nodeIndex) const
+{
+    const SVLocusNode& node(getNode(nodeIndex));
+
+    unsigned sum(0);
+    const SVLocusEdgeManager edgeMap(node.getEdgeManager());
+    for (const SVLocusEdgesType::value_type& edgeIter : edgeMap.getMap())
+    {
+        sum += getEdge(edgeIter.first,nodeIndex).getCount();
+    }
+    return sum;
+}
+
+
+
+
+void
+SVLocus::
+dumpNode(
+    std::ostream& os,
+    const LocusIndexType nodeIndex) const
+{
+    const SVLocusNode& node(getNode(nodeIndex));
+    os << "LocusNode: " << node.getInterval()
+       << " n_edges: " << node.size()
+       << " out_count: " << node.outCount()
+       << " in_count: " << getNodeInCount(nodeIndex)
+       << " evidence: " << node.getEvidenceRange()
+       << "\n";
+
+    const SVLocusEdgeManager edgeMap(node.getEdgeManager());
+    for (const SVLocusEdgesType::value_type& edgeIter : edgeMap.getMap())
+    {
+        os << "\tEdgeTo: " << edgeIter.first
+           << " out_count: " << edgeIter.second.getCount()
+           << " in_count: " << getEdge(edgeIter.first,nodeIndex).getCount() << "\n";
+    }
+}
+
+
+
+void
+SVLocus::
+findConnected(
+    const NodeIndexType startIndex,
+    std::set<NodeIndexType>& connected) const
+{
+    connected.clear();
+
+    std::stack<NodeIndexType> nodeStack;
+    nodeStack.push(startIndex);
+
+    while (! nodeStack.empty())
+    {
+        connected.insert(nodeStack.top());
+        const SVLocusNode& node(getNode(nodeStack.top()));
+        nodeStack.pop();
+        const SVLocusEdgeManager edgeMap(node.getEdgeManager());
+        for (const SVLocusEdgesType::value_type& edgeIter : edgeMap.getMap())
+        {
+            if (! connected.count(edgeIter.first)) nodeStack.push(edgeIter.first);
+        }
+    }
+}
+
+
+
+void
+SVLocus::
+mergeSelfOverlap()
+{
+    const unsigned nodeSize(size());
+    for (unsigned nodeIndex(0); nodeIndex<nodeSize; ++nodeIndex)
+    {
+        for (unsigned nodeIndex2(nodeIndex+1); nodeIndex2<nodeSize; ++nodeIndex2)
+        {
+            const unsigned revNodeIndex(nodeSize-(nodeIndex+1));
+            const unsigned revNodeIndex2(nodeSize-(nodeIndex2+1));
+            SVLocusNode& node1(getNode(revNodeIndex));
+            SVLocusNode& node2(getNode(revNodeIndex2));
+
+            // test whether 1 and 2 intersect, if they do, merge this into a self-edge node:
+            if (! node2.getInterval().isIntersect(node1.getInterval())) continue;
+
+            static flyweight_observer_t* obs(nullptr);
+            mergeNode(revNodeIndex, revNodeIndex2, obs);
+            eraseNode(revNodeIndex, obs);
+            break;
+        }
+    }
+}
+
+
+
+void
+SVLocus::
+checkState(const bool isCheckConnected) const
+{
+    using namespace illumina::common;
+
+    const unsigned nodeSize(size());
+    if (0 == nodeSize) return;
+
+    for (unsigned nodeIndex(0); nodeIndex<nodeSize; ++nodeIndex)
+    {
+        const SVLocusNode& node(getNode(nodeIndex));
+
+        // check that that every edge has a return path:
+        const SVLocusEdgeManager edgeMap(node.getEdgeManager());
+        for (const SVLocusEdgesType::value_type& edgeIter : edgeMap.getMap())
+        {
+            getEdge(edgeIter.first,nodeIndex);
+        }
+    }
+
+    if (! isCheckConnected) return;
+
+    // check that every locus in the graph is connected:
+    std::set<NodeIndexType> connected;
+    findConnected(0,connected);
+
+    if (nodeSize != connected.size())
+    {
+        std::ostringstream oss;
+        oss << "ERROR: SVLocus contains unconnected components, LocusIndex: " << _index << "\n";
+        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+    }
+}
+
+
+
+std::ostream&
+operator<<(std::ostream& os, const SVLocus& locus)
+{
+    os << "LOCUS BEGIN INDEX " << locus.getIndex() << "\n";
+    const unsigned nodeCount(locus.size());
+    for (unsigned nodeIndex(0); nodeIndex<nodeCount; ++nodeIndex)
+    {
+        os << "NodeIndex: " << nodeIndex << " ";
+        locus.dumpNode(os,nodeIndex);
+    }
+    os << "LOCUS END INDEX " << locus.getIndex() << "\n";
+    return os;
+}
diff --git a/src/c++/lib/svgraph/SVLocus.hh b/src/c++/lib/svgraph/SVLocus.hh
new file mode 100644
index 0000000..d28d2a1
--- /dev/null
+++ b/src/c++/lib/svgraph/SVLocus.hh
@@ -0,0 +1,456 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/flyweight_observer.hh"
+#include "svgraph/SVLocusNode.hh"
+
+
+typedef unsigned LocusIndexType;
+
+/// move message is composed of a bool, indicating if the node is being added (true) or deleted (false) from the index,
+/// and the id of the node itself.
+///
+typedef std::pair<bool, std::pair<LocusIndexType,NodeIndexType> > SVLocusNodeMoveMessage;
+
+
+
+struct SVLocusSet;
+
+
+
+/// \brief a set of regions containing dependent SV evidence
+///
+/// An SV locus is a region set hypothetically containing the breakends of 1 to many
+/// SVs.
+///
+/// The locus is composed of a set of non-overlapping contiguous genomic regions and
+/// the links (edges) between them. Each link has an associated evidence count.
+///
+/// This class internally manages the node shared pointers in a synced data structure,
+/// there's probably a better way to do this with transform_iterator, but I've always
+/// regretted using that.
+///
+struct SVLocus : public flyweight_notifier<SVLocusNodeMoveMessage>
+{
+    typedef std::vector<SVLocusNode> graph_type;
+
+    typedef graph_type::iterator iterator;
+    typedef graph_type::const_iterator const_iterator;
+
+    friend struct SVLocusSet;
+
+    bool
+    empty() const
+    {
+        return _graph.empty();
+    }
+
+    unsigned
+    size() const
+    {
+        return _graph.size();
+    }
+
+    const_iterator
+    begin() const
+    {
+        return _graph.begin();
+    }
+
+    const_iterator
+    end() const
+    {
+        return _graph.end();
+    }
+
+    LocusIndexType
+    getIndex() const
+    {
+        return _index;
+    }
+
+    const SVLocusNode&
+    getNode(const NodeIndexType nodePtr) const
+    {
+#ifdef DEBUG_SVL
+        if (nodePtr>=_graph.size()) nodeHurl(nodePtr);
+#endif
+        assert(nodePtr<_graph.size());
+        return _graph[nodePtr];
+    }
+
+    NodeIndexType
+    addNode(
+        const GenomeInterval interval,
+        flyweight_observer_t* obs = NULL)
+    {
+        assert(interval.tid >= 0);
+
+        NodeIndexType nodePtr(newGraphNode());
+        SVLocusNode& node(getNode(nodePtr));
+        node.setInterval(interval);
+        // default _evidenceRange to the breakend interval unless a better estimate is provided
+        node.setEvidenceRange(interval.range);
+        notifyAdd(obs, nodePtr);
+        return nodePtr;
+    }
+
+    // an edge count is only added on on from->to
+    //
+    void
+    linkNodes(
+        const NodeIndexType fromIndex,
+        const NodeIndexType toIndex,
+        const unsigned fromCount = 1,
+        const unsigned toCount = 0)
+    {
+        SVLocusNode& fromNode(getNode(fromIndex));
+        SVLocusNode& toNode(getNode(toIndex));
+        assert(! fromNode.isEdge(toIndex));
+        assert(! toNode.isEdge(fromIndex));
+
+        SVLocusEdge fromEdge;
+        fromEdge.setCount(fromCount);
+        SVLocusEdge toEdge;
+        toEdge.setCount(toCount);
+        fromNode.mergeEdge(toIndex,fromEdge);
+        toNode.mergeEdge(fromIndex,toEdge);
+    }
+
+    void
+    setNodeEvidence(
+        const NodeIndexType nodeIndex,
+        const known_pos_range2& evidenceRange)
+    {
+        getNode(nodeIndex).setEvidenceRange(evidenceRange);
+    }
+
+    /// find all node indices connected to startIndex
+    ///
+    /// non-recursive version
+    void
+    findConnected(
+        const NodeIndexType startIndex,
+        std::set<NodeIndexType>& connected) const;
+
+    /// the total observations in all nodes of the locus
+    unsigned
+    totalObservationCount() const
+    {
+        unsigned sum(0);
+        for (const SVLocusNode& node : *this)
+        {
+            sum += node.outCount();
+        }
+        return sum;
+    }
+
+    // the total number of edges between all nodes of the locus
+    unsigned
+    totalEdgeCount() const
+    {
+        unsigned sum(0);
+        for (const SVLocusNode& node : *this)
+        {
+            sum += node.size();
+        }
+        return sum;
+    }
+
+    // the total number of self edges in the locus
+    unsigned
+    selfEdgeCount() const
+    {
+        unsigned sum(0);
+        const unsigned nodeSize(size());
+        for (unsigned nodeIndex(0); nodeIndex<nodeSize; ++nodeIndex)
+        {
+            if (getNode(nodeIndex).isEdge(nodeIndex)) sum++;
+        }
+        return sum;
+    }
+
+    /// fill edge count histogram up to edgeCount.size()
+    void
+    getNodeEdgeCountDistro(std::vector<unsigned>& edgeCount) const
+    {
+        if (edgeCount.empty()) return;
+        const unsigned maxEdge(edgeCount.size()-1);
+        for (const SVLocusNode& node : *this)
+        {
+            edgeCount[std::min(node.size(),maxEdge)]++;
+        }
+    }
+
+    /// fill obs count histogram up to obsCount.size()
+    void
+    getNodeObsCountDistro(std::vector<unsigned>& obsCount) const
+    {
+        if (obsCount.empty()) return;
+        const unsigned maxObs(obsCount.size()-1);
+        for (const SVLocusNode& node : *this)
+        {
+            obsCount[std::min(node.outCount(),maxObs)]++;
+        }
+    }
+
+    /// return from->to edge
+    const SVLocusEdge&
+    getEdge(
+        const NodeIndexType fromIndex,
+        const NodeIndexType toIndex) const
+    {
+        const SVLocusNode& fromNode(getNode(fromIndex));
+        try
+        {
+            return fromNode.getEdge(toIndex);
+        }
+        catch (...)
+        {
+            // throw a richer exception message than node can produce on its own:
+            getEdgeException(fromIndex,toIndex);
+        }
+
+        // handle compiler warning for return val, this code should never run:
+        static SVLocusEdge bogusWarning;
+        return bogusWarning;
+    }
+
+    void
+    clear(
+        flyweight_observer_t* obs)
+    {
+        for (NodeIndexType i(0); i<size(); ++i)
+        {
+            notifyDelete(obs, i);
+        }
+        _graph.clear();
+    }
+
+    // find any self-overlapping nodes within the locus and merge
+    void
+    mergeSelfOverlap();
+
+    /// debug func to check that internal data-structures are in
+    /// a consistent state
+    void
+    checkState(const bool isCheckConnected = false) const;
+
+    // total the evidence count of all in-edges to this node
+    unsigned
+    getNodeInCount(const LocusIndexType nodeIndex) const;
+
+    void
+    clearNodeEdges(const NodeIndexType nodePtr);
+
+    // a fancier version of the SVLocusNode dumper which can
+    // report in-edge information
+    void
+    dumpNode(
+        std::ostream& os,
+        const LocusIndexType nodeIndex) const;
+
+    template<class Archive>
+    void save(Archive& ar, const unsigned /* version */) const
+    {
+        ar << _graph;
+    }
+
+    template<class Archive>
+    void load(Archive& ar, const unsigned /* version */)
+    {
+        clear(nullptr);
+        ar >> _graph;
+    }
+
+    BOOST_SERIALIZATION_SPLIT_MEMBER()
+
+private:
+
+    SVLocusNode&
+    getNode(const NodeIndexType nodePtr)
+    {
+#ifdef DEBUG_SVL
+        if (nodePtr>=_graph.size()) nodeHurl(nodePtr);
+#endif
+        assert(nodePtr<_graph.size());
+        return _graph[nodePtr];
+    }
+
+    void
+    nodeHurl(const NodeIndexType nodePtr) const;
+
+    void
+    updateIndex(const LocusIndexType& index)
+    {
+        _index=index;
+    }
+
+    // return true if node contains no out or in edges greater than
+    // minMergeEdgeCount
+    bool
+    isNoiseNode(
+        const unsigned minMergeEdgeCount,
+        const NodeIndexType nodeIndex) const;
+
+    /// remove all unmerged noise in-edges of node and
+    /// provide list of nodes which should be deleted
+    ///
+    /// return amount of evidence cleaned
+    unsigned
+    cleanNodeCore(
+        const unsigned minMergeEdgeCount,
+        const NodeIndexType nodeIndex,
+        std::set<NodeIndexType>& emptyNodes);
+
+    /// remove all unmerged noise in-edges of node and possibly
+    /// delete empty nodes
+    ///
+    /// return amount of evidence cleaned
+    unsigned
+    cleanNode(
+        const unsigned minMergeEdgeCount,
+        const NodeIndexType nodeIndex,
+        flyweight_observer_t* obs);
+
+    /// remove all unmerged noise edges and nodes
+    ///
+    /// return amount of evidence cleaned
+    unsigned
+    clean(
+        const unsigned minMergeEdgeCount,
+        flyweight_observer_t* obs);
+
+    void
+    getEdgeException(
+        const NodeIndexType fromIndex,
+        const NodeIndexType toIndex) const;
+
+    /// erase edges in both directions:
+    void
+    eraseEdgePair(
+        const NodeIndexType index1,
+        const NodeIndexType index2)
+    {
+        eraseEdge(index1,index2);
+        if (index1 == index2) return;
+        eraseEdge(index2,index1);
+    }
+
+    /// erase edge in one direction
+    void
+    eraseEdge(
+        const NodeIndexType fromIndex,
+        const NodeIndexType toIndex)
+    {
+        getNode(fromIndex).eraseEdge(toIndex);
+    }
+
+    /// copy fromLocus into this locus (this should be an intermediate part of a locus merge)
+    void
+    copyLocus(
+        const SVLocus& fromLocus,
+        flyweight_observer_t* obs)
+    {
+        assert(&fromLocus != this);
+
+        // simple method -- copy everything in with an offset in all index numbers:
+        const unsigned offset(_graph.size());
+        for (const SVLocusNode& fromNode : fromLocus)
+        {
+            const NodeIndexType nodeIndex(newGraphNode());
+            getNode(nodeIndex) = SVLocusNode(fromNode, offset);
+            notifyAdd(obs, nodeIndex);
+        }
+    }
+
+    /// join from node into to node
+    ///
+    /// from node is effectively destroyed,
+    //// because all incoming edges will be updated
+    ///
+    void
+    mergeNode(
+        const NodeIndexType fromIndex,
+        const NodeIndexType toIndex,
+        flyweight_observer_t* obs);
+
+    // remove node
+    //
+    void
+    eraseNode(
+        const NodeIndexType nodePtr,
+        flyweight_observer_t* obs);
+
+    // remove a list of node ids
+    void
+    eraseNodes(
+        const std::set<NodeIndexType>& nodes,
+        flyweight_observer_t* obs);
+
+    NodeIndexType
+    newGraphNode()
+    {
+        static const unsigned maxIndex(std::numeric_limits<NodeIndexType>::max());
+        unsigned index(_graph.size());
+        assert(index<maxIndex);
+        _graph.resize(index+1);
+        return static_cast<NodeIndexType>(index);
+    }
+
+    void
+    notifyAdd(
+        flyweight_observer_t* obs,
+        const NodeIndexType nodePtr)
+    {
+        if (NULL == obs) return;
+#ifdef DEBUG_SVL
+        log_os << "SVLocusNotifier: Add node: " << _index << ":" << nodePtr << "\n";
+#endif
+        notify_flyweight_observer(obs, std::make_pair(true,std::make_pair(_index, nodePtr)));
+    }
+
+    void
+    notifyDelete(
+        flyweight_observer_t* obs,
+        const NodeIndexType nodePtr)
+    {
+        if (NULL == obs) return;
+#ifdef DEBUG_SVL
+        log_os << "SVLocusNotifier: Delete node: " << _index << ":" << nodePtr << "\n";
+#endif
+        notify_flyweight_observer(obs, std::make_pair(false,std::make_pair(_index, nodePtr)));
+    }
+
+    graph_type _graph;
+    LocusIndexType _index = 0;
+};
+
+
+std::ostream&
+operator<<(std::ostream& os, const SVLocus& locus);
+
+BOOST_CLASS_IMPLEMENTATION(SVLocus, boost::serialization::object_serializable)
diff --git a/src/c++/lib/svgraph/SVLocusNode.cpp b/src/c++/lib/svgraph/SVLocusNode.cpp
new file mode 100644
index 0000000..29fc5d4
--- /dev/null
+++ b/src/c++/lib/svgraph/SVLocusNode.cpp
@@ -0,0 +1,80 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "common/Exceptions.hh"
+#include "svgraph/SVLocusNode.hh"
+
+#include <iostream>
+#include <sstream>
+
+
+std::ostream&
+operator<<(std::ostream& os, const SVLocusEdge& edge)
+{
+    os << "Edgecount: " << edge.getCount() << " isCountExact?: " << edge.isCountExact();
+    return os;
+}
+
+
+
+const SVLocusEdgesType SVLocusEdgeManager::staticMap;
+
+
+
+void
+SVLocusNode::
+getEdgeException(
+    const NodeIndexType toIndex,
+    const char* label) const
+{
+    using namespace illumina::common;
+
+    std::ostringstream oss;
+    oss << "ERROR: SVLocusNode::" << label << "() no edge exists\n";
+    oss << "\tfrom node: " << (*this) << "\n";
+    oss << "\tto_node index: " << toIndex << "\n";
+    BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+}
+
+
+
+std::ostream&
+operator<<(std::ostream& os, const SVLocusNode& node)
+{
+    os << "LocusNode: " << node.getInterval()
+       << " n_edges: " << node.size()
+       << " out_count: " << node.outCount()
+       << " evidence: " << node.getEvidenceRange()
+       << "\n";
+
+    const SVLocusEdgeManager edgeMap(node.getEdgeManager());
+    for (const SVLocusEdgesType::value_type& edgeIter : edgeMap.getMap())
+    {
+        os << "\tEdgeTo: " << edgeIter.first
+           << " out_count: " << edgeIter.second.getCount()
+           << "\n";
+    }
+    return os;
+}
+
diff --git a/src/c++/lib/svgraph/SVLocusNode.hh b/src/c++/lib/svgraph/SVLocusNode.hh
new file mode 100644
index 0000000..17a4511
--- /dev/null
+++ b/src/c++/lib/svgraph/SVLocusNode.hh
@@ -0,0 +1,663 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "svgraph/GenomeInterval.hh"
+
+#include "blt_util/thirdparty_push.h"
+
+#include "boost/serialization/map.hpp"
+#include "boost/serialization/vector.hpp"
+#include "boost/serialization/split_member.hpp"
+
+#include "blt_util/thirdparty_pop.h"
+
+#include <iosfwd>
+#include <limits>
+#include <map>
+#include <memory>
+#include <set>
+#include <vector>
+
+
+//#define DEBUG_SVL
+
+
+#ifdef DEBUG_SVL
+#include "blt_util/log.hh"
+
+#include <iostream>
+#endif
+
+
+struct SVLocusNode;
+
+
+// no constructor so that this can be used in a union:
+struct SVLocusEdge
+{
+    unsigned
+    getCount() const
+    {
+        return _count;
+    }
+
+    bool
+    isCountExact() const
+    {
+        return (getCount() != maxCount());
+    }
+
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /* version */)
+    {
+        ar& _count;
+    }
+
+    void
+    setCount(const unsigned count)
+    {
+        clearCount();
+        addCount(count);
+    }
+
+private:
+    typedef unsigned count_t;
+
+    friend struct SVLocusNode;
+
+    // merge edge into this one
+    //
+    void
+    mergeEdge(const SVLocusEdge& edge)
+    {
+        addCount(edge.getCount());
+    }
+
+    void
+    addCount(const unsigned increment)
+    {
+        if ((getCount()+increment)>maxCount())
+        {
+            _count = maxCount();
+        }
+        else
+        {
+            _count += increment;
+        }
+    }
+
+    void
+    clearCount()
+    {
+        _count = 0;
+    }
+
+    static
+    unsigned
+    maxCount()
+    {
+        return std::numeric_limits<count_t>::max();
+    }
+
+    count_t _count;
+};
+
+
+std::ostream&
+operator<<(std::ostream& os, const SVLocusEdge& edge);
+
+BOOST_CLASS_IMPLEMENTATION(SVLocusEdge, boost::serialization::object_serializable)
+
+
+typedef unsigned NodeIndexType;
+
+
+/// TODO: get SVLocusNode to switch between real and fake maps transparently using some fancy iterator:
+///
+#if 0
+class customConstEdgeIterator
+    : public boost::iterator_adaptor<
+    customConstEdgeIterator            // Derived
+    , Finite_vertices_iterator      // Base
+    , Vertex_handle                 // Value
+    , boost::forward_traversal_tag  // Traversal type
+    , Vertex_handle>                // Reference
+{
+private:
+    struct enabler {};
+
+public:
+    my_vertex_iterator()
+        : my_vertex_iterator::iterator_adaptor_(0) {}
+
+    explicit my_vertex_iterator(const Finite_vertices_iterator p)
+        : my_vertex_iterator::iterator_adaptor_(p) {}
+
+private:
+    friend class boost::iterator_core_access;
+    typename my_vertex_iterator::reference
+    dereference() const
+    {
+        return this->base();
+    }
+};
+#endif
+
+
+typedef std::map<NodeIndexType,SVLocusEdge> SVLocusEdgesType;
+
+
+/// used for an alternate compact representation of a node with zero or one edges
+struct SVLocusEdgeSingle
+{
+    template<class Archive>
+    void serialize(Archive& ar,const unsigned /* version */)
+    {
+        ar& index& edge& isZero;
+    }
+
+
+    NodeIndexType index;
+    SVLocusEdge edge;
+    bool isZero;
+};
+
+
+/// The edge manager enables iterator over the two forms of edges stored as a union
+/// TODO: hide the union behind an actual iteraror class
+struct SVLocusEdgeManager
+{
+    SVLocusEdgeManager(const SVLocusEdgeSingle& edge) :
+        mapPtr(&(staticMap))
+    {
+        if (! edge.isZero)
+        {
+            sharedMapPtr.reset(new SVLocusEdgesType);
+            sharedMapPtr->insert(std::make_pair(edge.index,edge.edge));
+            mapPtr=sharedMapPtr.get();
+        }
+    }
+
+    SVLocusEdgeManager(const SVLocusEdgesType& edgeMap) :
+        mapPtr(&edgeMap)
+    {}
+
+    const SVLocusEdgesType&
+    getMap() const
+    {
+        return *mapPtr;
+    }
+
+private:
+    const SVLocusEdgesType* mapPtr;
+    std::shared_ptr<SVLocusEdgesType> sharedMapPtr;
+
+    static const SVLocusEdgesType staticMap;
+};
+
+
+struct SVLocusNode
+{
+    typedef SVLocusEdgesType::const_iterator const_iterator;
+
+    SVLocusNode() :
+        _isSingle(true)
+    {
+        _edges.single.isZero=true;
+    }
+
+    // specialized copy ctor which offsets all address:
+    SVLocusNode(
+        const SVLocusNode& in,
+        const unsigned offset) :
+        _interval(in._interval),
+        _evidenceRange(in._evidenceRange),
+        _isSingle(in._isSingle)
+    {
+        if (_isSingle)
+        {
+            _edges.single = in._edges.single;
+            if (! _edges.single.isZero)
+            {
+                _edges.single.index += offset;
+            }
+        }
+        else
+        {
+            _edges.multiPtr = new SVLocusEdgesType;
+            for (const SVLocusEdgesType::value_type& val : in.getMap())
+            {
+                getMap().insert(std::make_pair(val.first+offset, val.second));
+            }
+        }
+    }
+
+    SVLocusNode(const SVLocusNode& rhs) :
+        _interval(rhs._interval),
+        _evidenceRange(rhs._evidenceRange),
+        _isSingle(rhs._isSingle)
+    {
+        if (_isSingle)
+        {
+            _edges.single = rhs._edges.single;
+        }
+        else
+        {
+            _edges.multiPtr = new SVLocusEdgesType;
+            getMap() = rhs.getMap();
+        }
+    }
+
+    ~SVLocusNode()
+    {
+        if (! _isSingle) delete _edges.multiPtr;
+    }
+
+    SVLocusNode&
+    operator=(const SVLocusNode& rhs)
+    {
+        if (&rhs==this) return *this;
+
+        clear();
+
+        _interval = rhs._interval;
+        _evidenceRange = rhs._evidenceRange;
+        _isSingle = rhs._isSingle;
+
+        if (_isSingle)
+        {
+            _edges.single = rhs._edges.single;
+        }
+        else
+        {
+            _edges.multiPtr = new SVLocusEdgesType;
+            getMap() = rhs.getMap();
+        }
+        return *this;
+    }
+
+
+    bool
+    empty() const
+    {
+        return (_isSingle && (_edges.single.isZero));
+    }
+
+    unsigned
+    size() const
+    {
+        if (_isSingle)
+        {
+            return (_edges.single.isZero ? 0u : 1u );
+        }
+        else
+        {
+            return getMap().size();
+        }
+    }
+
+    SVLocusEdgeManager
+    getEdgeManager() const
+    {
+        if (_isSingle)
+        {
+            return SVLocusEdgeManager(_edges.single);
+        }
+        else
+        {
+            return SVLocusEdgeManager(getMap());
+        }
+    }
+
+    bool
+    isOutCount() const
+    {
+        if (empty()) return false;
+        if (_isSingle)
+        {
+            return (0 != _edges.single.edge.getCount());
+        }
+        else
+        {
+            for (const SVLocusEdgesType::value_type& edgeIter : getMap())
+            {
+                if (edgeIter.second.getCount() > 0) return true;
+            }
+            return false;
+        }
+    }
+
+    unsigned
+    outCount() const
+    {
+        if (empty()) return 0;
+        if (_isSingle)
+        {
+            return (_edges.single.edge.getCount());
+        }
+        else
+        {
+            unsigned sum(0);
+            for (const SVLocusEdgesType::value_type& edgeIter : getMap())
+            {
+                sum += edgeIter.second.getCount();
+            }
+            return sum;
+        }
+    }
+
+    /// return edge from this to node
+    const SVLocusEdge&
+    getEdge(const NodeIndexType index) const
+    {
+        if (_isSingle)
+        {
+            if (! isEdge(index))
+            {
+                getEdgeException(index, "getEdge");
+            }
+            return _edges.single.edge;
+        }
+        else
+        {
+            const_iterator i(getMap().find(index));
+            if (i == getMap().end()) getEdgeException(index, "getEdge");
+            return i->second;
+        }
+    }
+
+    /// return true if edge exists:
+    bool
+    isEdge(const NodeIndexType index) const
+    {
+        if (_isSingle)
+        {
+            return ((! _edges.single.isZero) &&
+                    (index == _edges.single.index));
+        }
+        else
+        {
+            const_iterator i(getMap().find(index));
+            return (i != getMap().end());
+        }
+    }
+
+    /// add new edge to node, or merge this edge info in if node already has edge:
+    ///
+    /// this method is responsible for merging edge counts into the node count as well
+    void
+    mergeEdge(
+        const NodeIndexType index,
+        const SVLocusEdge& edge)
+    {
+        if (_isSingle)
+        {
+            if (_edges.single.isZero)
+            {
+                _edges.single.isZero = false;
+                _edges.single.index = index;
+                _edges.single.edge = edge;
+                return;
+            }
+            else if (index == _edges.single.index)
+            {
+                _edges.single.edge.mergeEdge(edge);
+                return;
+            }
+            else
+            {
+                convertToMulti();
+            }
+        }
+
+        assert(! _isSingle);
+
+        SVLocusEdgesType::iterator edgeIter(getMap().find(index));
+        if (edgeIter == getMap().end())
+        {
+            // this node does not already have an edge to "index", add a new edge:
+            getMap().insert(std::make_pair(index,edge));
+        }
+        else
+        {
+            // this node already has an edge to "index", merge the existing edge with the new one:
+            edgeIter->second.mergeEdge(edge);
+        }
+    }
+
+    /// reduce edge count to zero
+    void
+    setEdgeCount(
+        const NodeIndexType index,
+        const unsigned count)
+    {
+        if (_isSingle)
+        {
+            if (! isEdge(index))
+            {
+                getEdgeException(index, "setEdgeCount");
+            }
+            _edges.single.edge.setCount(count);
+        }
+        else
+        {
+            SVLocusEdgesType::iterator i(getMap().find(index));
+            if (i == getMap().end()) getEdgeException(index, "setEdgeCount");
+            i->second.setCount(count);
+        }
+    }
+
+    /// eliminate edge
+    void
+    eraseEdge(const NodeIndexType index)
+    {
+        if (_isSingle)
+        {
+            if (! isEdge(index))
+            {
+                getEdgeException(index, "eraseEdge");
+            }
+            _edges.single.isZero = true;
+        }
+        else
+        {
+            SVLocusEdgesType::iterator i(getMap().find(index));
+            if (i == getMap().end()) getEdgeException(index, "eraseEdge");
+            getMap().erase(i);
+            assert(getMap().size()>=1);
+            if (1 == getMap().size()) convertToSingle();
+        }
+    }
+
+    /// unhook edge from one node id, and stick it to another:
+    void
+    moveEdge(
+        const NodeIndexType fromIndex,
+        const NodeIndexType toIndex)
+    {
+        if (_isSingle)
+        {
+            assert(isEdge(fromIndex));
+            _edges.single.index = toIndex;
+        }
+        else
+        {
+            getMap().insert(std::make_pair(toIndex,getEdge(fromIndex)));
+            getMap().erase(fromIndex);
+        }
+    }
+
+    void
+    clear()
+    {
+        if (! _isSingle)
+        {
+            delete _edges.multiPtr;
+            _isSingle=true;
+        }
+        _edges.single.isZero = true;
+    }
+
+    const GenomeInterval&
+    getInterval() const
+    {
+        return _interval;
+    }
+
+    void
+    setInterval(const GenomeInterval& interval)
+    {
+        _interval.tid=interval.tid;
+        setIntervalRange(interval.range);
+    }
+
+    void
+    setIntervalRange(const known_pos_range2& range)
+    {
+        _interval.range=range;
+    }
+
+    const known_pos_range2&
+    getEvidenceRange() const
+    {
+        return _evidenceRange;
+    }
+
+    void
+    setEvidenceRange(const known_pos_range2& range)
+    {
+        _evidenceRange = range;
+    }
+
+    template<class Archive>
+    void save(Archive& ar, const unsigned /* version */) const
+    {
+        ar << _interval << _evidenceRange << _isSingle;
+        if (_isSingle)
+        {
+            ar << _edges.single;
+        }
+        else
+        {
+            ar << getMap();
+        }
+    }
+
+    template<class Archive>
+    void load(Archive& ar, const unsigned /* version */)
+    {
+        clear();
+
+        ar >> _interval >> _evidenceRange >> _isSingle;
+        if (_isSingle)
+        {
+            ar >> _edges.single;
+        }
+        else
+        {
+            _edges.multiPtr = new SVLocusEdgesType;
+            ar >> getMap();
+        }
+    }
+
+    BOOST_SERIALIZATION_SPLIT_MEMBER()
+
+private:
+
+    union CompactEdgeType
+    {
+        SVLocusEdgeSingle single;
+        SVLocusEdgesType* multiPtr;
+    };
+
+
+    SVLocusEdgesType&
+    getMap()
+    {
+        assert(! _isSingle);
+        return *(_edges.multiPtr);
+    }
+
+    const SVLocusEdgesType&
+    getMap() const
+    {
+        assert(! _isSingle);
+        return *(_edges.multiPtr);
+    }
+
+    // given a node in the multi-edge state with one edge, convert to
+    // the single-edge state
+    void
+    convertToSingle()
+    {
+        assert(! _isSingle);
+        assert(1 == getMap().size());
+
+        const_iterator begin(getMap().begin());
+
+        SVLocusEdgeSingle transfer;
+        transfer.isZero=false;
+        transfer.index=begin->first;
+        transfer.edge=begin->second;
+
+        delete _edges.multiPtr;
+        _edges.single = transfer;
+        _isSingle=true;
+    }
+
+
+    // given a node in the single-edge state with one edge, convert to
+    // the multi-edge state
+    void
+    convertToMulti()
+    {
+        assert(_isSingle);
+        assert(! _edges.single.isZero);
+        const SVLocusEdgeSingle transfer = _edges.single;
+
+        _isSingle = false;
+        _edges.multiPtr = new SVLocusEdgesType;
+        getMap().insert(std::make_pair(transfer.index, transfer.edge));
+    }
+
+    void
+    getEdgeException(
+        const NodeIndexType toIndex,
+        const char* label) const;
+
+    //////////////////  data:
+    GenomeInterval _interval;
+    known_pos_range2 _evidenceRange;
+    CompactEdgeType _edges;
+    bool _isSingle;
+};
+
+
+std::ostream&
+operator<<(std::ostream& os, const SVLocusNode& node);
+
+
+BOOST_CLASS_IMPLEMENTATION(SVLocusNode, boost::serialization::object_serializable)
diff --git a/src/c++/lib/svgraph/SVLocusSampleCounts.cpp b/src/c++/lib/svgraph/SVLocusSampleCounts.cpp
new file mode 100644
index 0000000..37fe654
--- /dev/null
+++ b/src/c++/lib/svgraph/SVLocusSampleCounts.cpp
@@ -0,0 +1,121 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "blt_util/io_util.hh"
+#include "svgraph/SVLocusSampleCounts.hh"
+
+#include <iomanip>
+#include <iostream>
+
+
+
+static
+void
+writeLine(
+    std::ostream& os,
+    const char* label,
+    const double val,
+    const double total)
+{
+    static const char sep('\t');
+
+    os << std::fixed;
+    os << label << sep;
+    os << std::setprecision(0);
+    os << val << sep;
+    os << std::setprecision(4);
+    os << val/total << '\n';
+}
+
+
+
+void
+SampleReadInputCounts::
+write(
+    std::ostream& os) const
+{
+    const double dtotal(total());
+    StreamScoper ss(os);
+    writeLine(os,"MinMapqFiltered",minMapq,dtotal);
+    writeLine(os,"NotFiltered",evidenceCount.total,dtotal);
+    writeLine(os,"NotFilteredAndIgnored",evidenceCount.ignored,dtotal);
+    writeLine(os,"NotFilteredAndAnomalousPair",evidenceCount.anom,dtotal);
+    writeLine(os,"NotFilteredAndAnomalousPairRemotes",evidenceCount.remoteRecoveryCandidates,dtotal);
+    writeLine(os,"NotFilteredAndSplitRead",evidenceCount.split,dtotal);
+    writeLine(os,"NotFilteredAndLargeIndel",evidenceCount.indel,dtotal);
+    writeLine(os,"NotFilteredAndSemiAligned",evidenceCount.assm,dtotal);
+}
+
+
+
+void
+SampleEvidenceCounts::
+write(
+    std::ostream& os) const
+{
+    static const char sep('\t');
+
+    double total(0);
+    for (unsigned i(0); i<SVEvidenceType::SIZE; ++i)
+    {
+        total += eType[i];
+    }
+
+    StreamScoper ss(os);
+    os << std::fixed << std::setprecision(4);
+    for (unsigned i(0); i<SVEvidenceType::SIZE; ++i)
+    {
+        os << "EvidenceType_" << SVEvidenceType::label(i) << sep << eType[i] << sep << eType[i]/total << '\n';
+    }
+    os << "ClosePairs" << sep << closeCount << '\n';
+}
+
+
+void
+SampleCounts::
+write(
+    std::ostream& os,
+    const char* label) const
+{
+    os << "\n[" << label << "]\n";
+    os << "Source\t" << sampleSource << "\n";
+    input.write(os);
+    evidence.write(os);
+}
+
+
+
+void
+AllCounts::
+write(
+    std::ostream& os,
+    const std::vector<std::string>& sampleLabels) const
+{
+    assert(size() == sampleLabels.size());
+    const unsigned s(size());
+    for (unsigned i(0); i<s; ++i)
+    {
+        getSampleCounts(i).write(os,sampleLabels[i].c_str());
+    }
+}
diff --git a/src/c++/lib/svgraph/SVLocusSampleCounts.hh b/src/c++/lib/svgraph/SVLocusSampleCounts.hh
new file mode 100644
index 0000000..985e965
--- /dev/null
+++ b/src/c++/lib/svgraph/SVLocusSampleCounts.hh
@@ -0,0 +1,226 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "manta/SVBreakend.hh"
+#include "manta/SVLocusEvidenceCount.hh"
+
+#include <algorithm>
+#include <iosfwd>
+#include <string>
+#include <vector>
+
+
+/// enumerate evidence type estimated on input for each sample
+struct SampleReadInputCounts
+{
+    void
+    clear()
+    {
+        minMapq = 0;
+        evidenceCount.clear();
+    }
+
+    double
+    total() const
+    {
+        return (minMapq+evidenceCount.total);
+    }
+
+    void
+    merge(
+        const SampleReadInputCounts& rhs)
+    {
+        minMapq += rhs.minMapq;
+        evidenceCount.merge(rhs.evidenceCount);
+    }
+
+    void
+    write(
+        std::ostream& os) const;
+
+
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /* version */)
+    {
+        ar& minMapq& evidenceCount;
+    }
+
+    // using doubles for integral counts here because (1) counts are potentially very high and (2) exact counts don't matter
+
+    ///< total number of reads filtered for mapq before any classification step
+    double minMapq = 0;
+
+    SVLocusEvidenceCount evidenceCount;
+};
+
+
+
+/// enumerate detailed evidence type counts for each sample
+struct SampleEvidenceCounts
+{
+    void
+    clear()
+    {
+        std::fill(eType.begin(),eType.end(),0);
+        closeCount = 0;
+    }
+
+    void
+    merge(
+        const SampleEvidenceCounts& srs)
+    {
+        for (unsigned i(0); i< SVEvidenceType::SIZE; ++i)
+        {
+            eType[i] += srs.eType[i];
+        }
+        closeCount += srs.closeCount;
+    }
+
+    void
+    write(
+        std::ostream& os) const;
+
+
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /* version */)
+    {
+        ar& eType& closeCount;
+    }
+
+    // (don't want to bother with std::array even though size is known at compile-time:
+    std::vector<unsigned long> eType = std::vector<unsigned long>(SVEvidenceType::SIZE,0);
+
+    /// these are anomalous pairs which still are close to the proper pair threshold, thus downweighted
+    unsigned long closeCount = 0;
+};
+
+
+
+/// total statistics for each sample
+struct SampleCounts
+{
+    void
+    clear()
+    {
+        input.clear();
+        evidence.clear();
+    }
+
+    void
+    merge(
+        const SampleCounts& srs)
+    {
+        assert(sampleSource == srs.sampleSource);
+        input.merge(srs.input);
+        evidence.merge(srs.evidence);
+    }
+
+    void
+    write(
+        std::ostream& os,
+        const char* label) const;
+
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /* version */)
+    {
+        ar& sampleSource& input& evidence;
+    }
+
+    std::string sampleSource;
+    SampleReadInputCounts input;
+    SampleEvidenceCounts evidence;
+};
+
+
+
+struct AllCounts
+{
+    void
+    clear()
+    {
+        for (auto& sample : _samples)
+        {
+            sample.clear();
+        }
+    }
+
+    void
+    setSampleCount(
+        const unsigned sampleCount)
+    {
+        _samples.resize(sampleCount);
+    }
+
+    unsigned
+    size() const
+    {
+        return _samples.size();
+    }
+
+    SampleCounts&
+    getSampleCounts(
+        const unsigned index)
+    {
+        assert(index < size());
+        return _samples[index];
+    }
+
+    const SampleCounts&
+    getSampleCounts(
+        const unsigned index) const
+    {
+        assert(index < size());
+        return _samples[index];
+    }
+
+    void
+    merge(
+        const AllCounts& rhs)
+    {
+        assert(size() == rhs.size());
+
+        const unsigned s(size());
+        for (unsigned i(0); i<s; ++i)
+        {
+            getSampleCounts(i).merge(rhs.getSampleCounts(i));
+        }
+    }
+
+    void
+    write(
+        std::ostream& os,
+        const std::vector<std::string>& sampleLabels) const;
+
+    template<class Archive>
+    void serialize(Archive& ar, const unsigned /* version */)
+    {
+        ar& _samples;
+    }
+
+private:
+
+    std::vector<SampleCounts> _samples;
+};
diff --git a/src/c++/lib/svgraph/SVLocusSet.cpp b/src/c++/lib/svgraph/SVLocusSet.cpp
new file mode 100644
index 0000000..6a25b1f
--- /dev/null
+++ b/src/c++/lib/svgraph/SVLocusSet.cpp
@@ -0,0 +1,1486 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "blt_util/log.hh"
+#include "blt_util/SizeDistribution.hh"
+#include "common/Exceptions.hh"
+#include "svgraph/SVLocusSet.hh"
+
+#include "blt_util/thirdparty_push.h"
+
+#include "boost/archive/binary_iarchive.hpp"
+#include "boost/archive/binary_oarchive.hpp"
+#include "boost/foreach.hpp"
+
+#include "blt_util/thirdparty_pop.h"
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+
+
+std::ostream&
+operator<<(std::ostream& os, const SVLocusSet::NodeAddressType& a)
+{
+    os << a.first << ":" << a.second;
+    return os;
+}
+
+
+
+void
+SVLocusSet::
+locusHurl(const LocusIndexType index, const char* label) const
+{
+    using namespace illumina::common;
+
+    std::ostringstream oss;
+    oss << "ERROR: Attempting to access locus: " << index << " in locusSet with size: " << size() << " accessLabel: " << label << "\n";
+    BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+}
+
+
+
+/// is the node set from multiple loci?
+static
+bool
+isMultiLocus(
+    const LocusIndexType locusIndex,
+    const std::set<SVLocusSet::NodeAddressType>& nodes)
+{
+    for (const SVLocusSet::NodeAddressType& addy : nodes)
+    {
+        if (addy.first != locusIndex) return true;
+    }
+    return false;
+}
+
+
+
+void
+SVLocusSet::
+merge(const SVLocus& inputLocus)
+{
+    //
+    // test each node in the input locus for intersection to nodes in this graph and insert/join to existing nodes as appropriate
+    //
+
+    using namespace illumina::common;
+
+    assert(! _isFinalized);
+
+    // meaningless input indicates an error in client code:
+    assert(! inputLocus.empty());
+
+    if (inputLocus.empty()) return;
+
+#ifdef DEBUG_SVL
+    static const std::string logtag("SVLocusSet::merge");
+    log_os << logtag << " inputLocus: " << inputLocus;
+    checkState(true);
+#endif
+
+    inputLocus.checkState(true);
+
+    const LocusIndexType startLocusIndex(insertLocus(inputLocus));
+    const SVLocus& startLocus(_loci[startLocusIndex]);
+    LocusIndexType headLocusIndex(startLocusIndex);
+
+    // indicates if the input locus has been 'moved' into another locus in the graph:
+    bool isInputLocusMoved(false);
+
+    // indicates that the locus will not be inserted into the graph.
+    // if true, skip merge and clear out the startLocus
+    bool isAbortMerge(false);
+
+    // because we have a non-general interval overlap test, we must order search
+    // nodes by begin_pos on each chromosome
+    //
+    typedef std::map<GenomeInterval,NodeIndexType> nodeMap_t;
+    nodeMap_t nodeMap;
+    {
+        const NodeIndexType nodeCount(startLocus.size());
+        for (NodeIndexType nodeIndex(0); nodeIndex<nodeCount; ++nodeIndex)
+        {
+            nodeMap.insert(std::make_pair(startLocus.getNode(nodeIndex).getInterval(),nodeIndex));
+        }
+    }
+
+    // reuse this intersectNodes object throughout the merge:
+    std::set<NodeAddressType> intersectNodes;
+
+    // test if the graph has grown too complex in these regions. If so, abort the insertion of this locus:
+    for (const nodeMap_t::value_type& nodeVal : nodeMap)
+    {
+        static const bool isTestUsability(true);
+
+        // get a standard intersection of the input node:
+        const bool isUsable(getNodeIntersect(startLocusIndex, nodeVal.second, intersectNodes, isTestUsability));
+
+        if (! isUsable)
+        {
+#ifdef DEBUG_SVL
+            log_os << logtag << "Aborting merge\n";
+#endif
+            isAbortMerge=true;
+            break;
+        }
+    }
+
+    for (const nodeMap_t::value_type& nodeVal : nodeMap)
+    {
+        if (isAbortMerge) break;
+
+        const NodeIndexType nodeIndex(nodeVal.second);
+
+#ifdef DEBUG_SVL
+        log_os << logtag << " inputNode: " << NodeAddressType(std::make_pair(startLocusIndex,nodeIndex)) << " " << startLocus.getNode(nodeIndex);
+#endif
+
+        getNodeMergeableIntersect(startLocusIndex, nodeIndex, isInputLocusMoved, intersectNodes);
+
+#ifdef DEBUG_SVL
+        log_os << logtag << " intersect_size: " << intersectNodes.size() << "\n";
+        for (const NodeAddressType& val : intersectNodes)
+        {
+            log_os << logtag << " intersect address: " << val << " node: " <<  getNode(val) << "\n";
+        }
+#endif
+
+        if (isInputLocusMoved)
+        {
+            if (2>intersectNodes.size()) continue;
+        }
+        else
+        {
+            if (intersectNodes.empty()) continue;
+        }
+
+        while ( isMultiLocus(headLocusIndex, intersectNodes) )
+        {
+            // if there are any intersections, copy the loci of all intersecting nodes into
+            // a single locus, by convention we use the lowest locusIndex of the intersecting set
+            moveIntersectToLowIndex(intersectNodes,startLocusIndex,headLocusIndex);
+            if (! isInputLocusMoved) isInputLocusMoved=(headLocusIndex != startLocusIndex);
+
+            getNodeMergeableIntersect(startLocusIndex, nodeIndex, isInputLocusMoved, intersectNodes);
+            assert(! intersectNodes.empty());
+
+#ifdef DEBUG_SVL
+            log_os << logtag << " multilocus detected, nodes moved and re-intersected. intersect_size: " << intersectNodes.size() << "\n";
+#endif
+        }
+
+#ifdef DEBUG_SVL
+        log_os << logtag << " intersect2_size: " << intersectNodes.size() << "\n";
+        for (const NodeAddressType& val : intersectNodes)
+        {
+            log_os << logtag << " intersect2 address: " << val << " node: " <<  getNode(val) << "\n";
+        }
+#endif
+
+        // merge overlapping nodes in order from highest nodeid to lowest, so that the
+        // merge process does not invalidate nodeids of higher value
+        //
+        // we first need to find a node corresponding to the input node (but possibly merged to a larger region already):
+        //
+        NodeAddressType inputSuperAddy;
+        {
+            bool isInputSuperFound(false);
+            const known_pos_range2& inputRange(getLocus(startLocusIndex).getNode(nodeIndex).getInterval().range);
+
+            for (const NodeAddressType& val : intersectNodes)
+            {
+                assert(val.first == headLocusIndex);
+
+                // one node must be a superset of the input node, find this and store separately:
+                if (getNode(val).getInterval().range.is_superset_of(inputRange))
+                {
+                    inputSuperAddy=val;
+                    isInputSuperFound=true;
+                    break;
+                }
+            }
+            assert(isInputSuperFound);
+        }
+
+        // merge this inputNode with each intersecting Node,
+        // and eliminate the intersecting node:
+        //
+        NodeAddressType mergeTargetAddy(inputSuperAddy);
+        BOOST_REVERSE_FOREACH(NodeAddressType nodeAddy, intersectNodes)
+        {
+            if (nodeAddy == inputSuperAddy) continue;
+            if (nodeAddy < mergeTargetAddy) std::swap(nodeAddy,mergeTargetAddy);
+#ifdef DEBUG_SVL
+            log_os << logtag << " MergeAndRemove: " << nodeAddy << "\n";
+#endif
+            mergeNodePtr(nodeAddy,mergeTargetAddy);
+            removeNode(nodeAddy);
+#ifdef DEBUG_SVL
+            log_os << logtag << " Finished: " << nodeAddy << "\n";
+            checkState();
+#endif
+        }
+    }
+
+    if (isAbortMerge || isInputLocusMoved)
+    {
+        clearLocus(startLocusIndex);
+    }
+
+#ifdef DEBUG_SVL
+    checkState(true,true);
+#endif
+}
+
+
+
+void
+SVLocusSet::
+merge(
+    const SVLocusSet& inputSet)
+{
+    // TODO: check for compatible bam headers between inputSet and this
+
+    assert(getMinMergeEdgeCount() == inputSet.getMinMergeEdgeCount());
+
+    for (const SVLocus& locus : inputSet._loci)
+    {
+        if (locus.empty()) continue;
+
+        try
+        {
+            merge(locus);
+        }
+        catch (...)
+        {
+            log_os << "ERROR: SVLocusSet merge failed.\n"
+                   << "\tSVLocusSet source: " << inputSet.getSource() << "\n"
+                   << "\tSVLocus index: " << locus.getIndex() << "\n";
+            throw;
+        }
+    }
+
+    _totalCleaned += inputSet._totalCleaned;
+    _counts.merge(inputSet._counts);
+    _highestSearchCount = std::max(_highestSearchCount, inputSet._highestSearchCount);
+    _isMaxSearchCount = (_isMaxSearchCount || inputSet._isMaxSearchCount);
+    _highestSearchDensity = std::max(_highestSearchDensity, inputSet._highestSearchDensity);
+    _isMaxSearchDensity = (_isMaxSearchDensity || inputSet._isMaxSearchDensity);
+    _buildTime.merge(inputSet._buildTime);
+    _mergeTime.merge(inputSet._mergeTime); // this one is more of a formality...
+}
+
+
+
+bool
+SVLocusSet::
+getNodeIntersectCore(
+    const LocusIndexType inputLocusIndex,
+    const NodeIndexType inputNodeIndex,
+    const LocusSetIndexerType& searchNodes,
+    const LocusIndexType filterLocusIndex,
+    std::set<NodeAddressType>& intersectNodes,
+    const bool isTestUsability) const
+{
+    typedef LocusSetIndexerType::const_iterator in_citer;
+
+#ifdef DEBUG_SVL
+    static const std::string logtag("SVLocusSet::getNodeIntersectCore");
+    log_os << logtag << " inputNode: " << inputLocusIndex << ":" << inputNodeIndex << " " << getNode(std::make_pair(inputLocusIndex,inputNodeIndex));
+    checkState();
+#endif
+
+    assert(_isIndexed);
+
+    intersectNodes.clear();
+
+    // get all nodes \in searchNodes which intersect with the input node:
+    const NodeAddressType inputAddy(std::make_pair(inputLocusIndex,inputNodeIndex));
+    const in_citer it(searchNodes.data().lower_bound(inputAddy));
+    const GenomeInterval& inputInterval(getNode(inputAddy).getInterval());
+    const pos_t maxRegionSize(_maxRegionSize[inputInterval.tid]);
+
+    const in_citer it_begin(searchNodes.data().begin()), it_end(searchNodes.data().end());
+
+    // diagnostics to determine if graph is growing too dense in one region:
+    bool isUsable(true);
+    unsigned searchCount(0);
+
+    // first look forward and extend to find all nodes which this inputNode intersects:
+    for (in_citer it_fwd(it); it_fwd != it_end; ++it_fwd)
+    {
+        if (isTestUsability)
+        {
+            searchCount++;
+            if (searchCount > _opt.maxSearchCount)
+            {
+                isUsable = false;
+                _isMaxSearchCount=true;
+                break;
+            }
+        }
+
+        if (it_fwd->first == filterLocusIndex) continue;
+#ifdef DEBUG_SVL
+        log_os << logtag << "\tFWD test: " << (*it_fwd) << " " << getNode(*it_fwd);
+#endif
+        if (! inputInterval.isIntersect(getNode(*it_fwd).getInterval())) break;
+        intersectNodes.insert(*it_fwd);
+#ifdef DEBUG_SVL
+        log_os << logtag << "\tFWD insert: " << (*it_fwd) << "\n";
+#endif
+    }
+
+    // now find all intersecting nodes in reverse direction:
+    for (in_citer it_rev(it); it_rev != it_begin; )
+    {
+        --it_rev;
+
+        if (isTestUsability)
+        {
+            if (! isUsable) break;
+            searchCount++;
+            if (searchCount > _opt.maxSearchCount)
+            {
+                isUsable = false;
+                _isMaxSearchCount=true;
+                break;
+            }
+        }
+
+        if (it_rev->first == filterLocusIndex) continue;
+#ifdef DEBUG_SVL
+        log_os << logtag << "\tREV test: " << (*it_rev) << " " << getNode(*it_rev);
+#endif
+        const GenomeInterval& searchInterval(getNode(*it_rev).getInterval());
+        if (! inputInterval.isIntersect(searchInterval))
+        {
+            if (! isOverlapAllowed()) break;
+
+            if (inputInterval.tid != searchInterval.tid) break;
+            if ((searchInterval.range.begin_pos()+maxRegionSize)<inputInterval.range.begin_pos()) break;
+            continue;
+        }
+
+        intersectNodes.insert(*it_rev);
+#ifdef DEBUG_SVL
+        log_os << logtag << "\tREV insert: " << (*it_rev) << "\n";
+#endif
+    }
+
+    if (! isTestUsability) return true;
+
+    _highestSearchCount = std::max(_highestSearchCount, searchCount);
+
+    pos_t searchSize(inputInterval.range.end_pos() - std::max(0, inputInterval.range.begin_pos()-maxRegionSize));
+
+    assert(searchSize>=0);
+    if (0 != searchSize)
+    {
+        static const pos_t minSearchSize(40);
+        searchSize = std::max(searchSize, minSearchSize);
+        const float searchDensity(static_cast<float>(searchCount)/static_cast<float>(searchSize));
+        _highestSearchDensity = std::max(_highestSearchDensity, searchDensity);
+
+        if (searchDensity > _opt.maxSearchDensity)
+        {
+            isUsable = false;
+            _isMaxSearchDensity=true;
+        }
+    }
+
+    return isUsable;
+}
+
+
+
+void
+SVLocusSet::
+getIntersectingEdgeNodes(
+    const LocusIndexType inputLocusIndex,
+    const NodeIndexType inputRemoteNodeIndex,
+    const EdgeMapType& remoteIntersectNodeToLocalNodeMap,
+    const LocusSetIndexerType& remoteIntersectNodes,
+    std::vector<EdgeInfoType>& edges) const
+{
+    typedef EdgeMapType::const_iterator rliter_t;
+    typedef std::pair<rliter_t,rliter_t> rlmap_range_t;
+
+    edges.clear();
+
+    // find all nodes, from the remoteIntersectNodes set, which intersect this function's input node:
+    //
+    // for this application, inputLocus is an input set isolated from the rest of the graph, so nodes
+    // intersected in the inputLocus are filtered out
+    //
+    std::set<NodeAddressType> edgeIntersectRemoteTemp;
+    getNodeIntersectCore(inputLocusIndex,inputRemoteNodeIndex,remoteIntersectNodes,inputLocusIndex,edgeIntersectRemoteTemp);
+
+    for (const NodeAddressType& remoteIsectAddy : edgeIntersectRemoteTemp)
+    {
+        // find what local nodes the remote nodes trace back to:
+        const rlmap_range_t remoteIsectRange(remoteIntersectNodeToLocalNodeMap.equal_range(remoteIsectAddy));
+        assert(remoteIsectRange.first != remoteIntersectNodeToLocalNodeMap.end());
+        for (rliter_t riter(remoteIsectRange.first); riter != remoteIsectRange.second; ++riter)
+        {
+            const NodeAddressType localIntersectAddy(std::make_pair(remoteIsectAddy.first,riter->second));
+            edges.push_back(std::make_pair(localIntersectAddy,remoteIsectAddy.second));
+        }
+    }
+}
+
+
+
+void
+SVLocusSet::
+findSignalNodes(
+    const LocusIndexType inputLocusIndex,
+    const NodeAddressType findSignalAddy,
+    std::set<NodeAddressType>& signalIntersectNodes,
+    const std::set<NodeAddressType>& inputIntersectRemotes,
+    bool& isIntersectRemotes) const
+{
+#ifdef DEBUG_SVL
+    static const std::string logtag("SVLocusSet::findSignalNodes");
+    log_os << logtag << " findSignalAddy: " << findSignalAddy << "\n";
+#endif
+    // get a standard intersection of the input node:
+    std::set<NodeAddressType> intersectNodes;
+    getNodeIntersectCore(findSignalAddy.first, findSignalAddy.second, _inodes, inputLocusIndex, intersectNodes);
+    for (const NodeAddressType& intersectAddy : intersectNodes)
+    {
+#ifdef DEBUG_SVL
+        log_os << logtag << " intersectAddy: " << intersectAddy << "\n";
+#endif
+        if (isNoiseNode(intersectAddy))
+        {
+            // check for the rare remote intersect condition:
+            if (! isIntersectRemotes)
+            {
+                if (inputIntersectRemotes.count(intersectAddy))
+                {
+                    isIntersectRemotes=true;
+                }
+            }
+            continue;
+        }
+
+#ifdef DEBUG_SVL
+        if (signalIntersectNodes.count(intersectAddy) == 0)
+        {
+            log_os << logtag << " merge/new: " << findSignalAddy << " " << intersectAddy << "\n";
+        }
+#endif
+
+        signalIntersectNodes.insert(intersectAddy);
+    }
+}
+
+
+
+void
+SVLocusSet::
+getNodeMergeableIntersect(
+    const LocusIndexType inputLocusIndex,
+    const NodeIndexType inputNodeIndex,
+    const bool isInputLocusMoved,
+    std::set<NodeAddressType>& mergeIntersectNodes) const
+{
+    //
+    // TODO: There's room for significant optimization of these methods. The improvements are not trivial,
+    //   but they would allow us to filter fewer nodes from being merged when node intersection counts become large.
+    //
+
+    //
+    // There are two ways sets of mergeable nodes can occur:
+    //
+    // (1) There is a set of nodes which overlap with both input node and one
+    // of the remote nodes that the input points to (ie they have a shared edge).
+    // When totaled together, the edge count of this set + the inputNode edge
+    // exceeds minMergeEdgeCount.
+    //
+    // (2) The input node either contains an edge which is greater than minMergeEdgeCount
+    // or will contain such an edge due to (1), in this case the input node can be merged
+    // with a locally overlapping node which also contains an edge which is greater than
+    // minMergeEdgeCount. Note that in case (2) remote node intersection is not required.
+    //
+
+    const NodeAddressType inputAddy(std::make_pair(inputLocusIndex,inputNodeIndex));
+    const SVLocusNode& inputNode(getNode(inputAddy));
+
+#ifdef DEBUG_SVL
+    static const std::string logtag("SVLocusSet::getNodeMergableIntersect");
+    log_os << logtag << " inputNode: " << inputAddy << " " << inputNode;
+    checkState();
+#endif
+
+    // reuse this intersectNodes as a temporary throughout the methods below
+    std::set<NodeAddressType> intersectNodes;
+
+    //
+    // build a new index, which contains, for all nodes x which intersect the input, an
+    // enumeration of the remote nodes Y connected by edges to node x (remoteIntersectNodes)
+    // and a map for each node y \in Y pointing back to node x (remoteIntersectNodeToLocalNodeMap)
+    //
+    LocusSetIndexerType remoteIntersectNodes(*this);
+    EdgeMapType remoteIntersectNodeToLocalNodeMap;
+
+    // nodes which intersect the input and have already been certified as signal:
+    std::set<NodeAddressType> signalIntersectNodes;
+    {
+        // get a standard intersection of the input node:
+        getNodeIntersect(inputLocusIndex, inputNodeIndex, intersectNodes);
+
+        //
+        // 1. build the new remoteIntersectNodes/remoteIntersectNodeToLocalNodeMap index
+        //
+        for (const NodeAddressType& intersectAddy : intersectNodes)
+        {
+            const SVLocusNode& intersectNode(getNode(intersectAddy));
+
+            // get the remotes of each node which intersect with the query node,
+            // place these in remoteIntersectNodes
+            const SVLocusEdgeManager edgeMap(intersectNode.getEdgeManager());
+            for (const SVLocusEdgesType::value_type& intersectEdge : edgeMap.getMap())
+            {
+                // build remote <-> local indexing structures:
+                NodeAddressType remoteAddy(std::make_pair(intersectAddy.first,intersectEdge.first));
+                remoteIntersectNodes.data().insert(remoteAddy);
+                remoteIntersectNodeToLocalNodeMap.insert(std::make_pair(remoteAddy,intersectAddy.second));
+            }
+        }
+
+#ifdef DEBUG_SVL
+        log_os << logtag << " remoteIntersectNodes.size(): " << remoteIntersectNodes.data().size() << "\n";
+        for (const NodeAddressType& addy : remoteIntersectNodes.data())
+        {
+            log_os << logtag << "\tremoteIntersectNode: " << addy << " " << getNode(addy);
+        }
+#endif
+
+        //
+        // 2. get the signal node set:
+        //
+        // Note that the signal node search is not transitive b/c we have required all signal nodes
+        // in the graph to have merged already.
+        //
+        for (const NodeAddressType& intersectAddy : intersectNodes)
+        {
+            if (! isNoiseNode(intersectAddy))
+            {
+                signalIntersectNodes.insert(intersectAddy);
+            }
+        }
+
+#ifdef DEBUG_SVL
+        log_os << logtag << " signalIntersect.size(): " << signalIntersectNodes.size() << "\n";
+        for (const NodeAddressType& addy : signalIntersectNodes)
+        {
+            log_os << logtag << "\tsignalIntersectNode: " << addy << " " << getNode(addy);
+        }
+#endif
+    }
+
+    //
+    // begin building the primary function output, mergeIntersectNodes, by enumerating all edges of the input node
+    //
+    mergeIntersectNodes.clear();
+
+    // loop through each edge connected to the input node
+    const SVLocusEdgeManager edgeMap(inputNode.getEdgeManager());
+    for (const SVLocusEdgesType::value_type& inputEdge : edgeMap.getMap())
+    {
+#ifdef DEBUG_SVL
+        log_os << logtag << " processing edge: " << inputAddy << "->" << inputLocusIndex << ":" << inputEdge.first << "\n";
+        checkState();
+#endif
+
+        //
+        // for each edge from the input node, get all intersecting edges
+        //
+        // 'intersecting edge' means that the nodes connected by the two edges each overlap
+        //
+        std::vector<EdgeInfoType> inputIntersectEdges;
+        getIntersectingEdgeNodes(inputLocusIndex, inputEdge.first, remoteIntersectNodeToLocalNodeMap, remoteIntersectNodes, inputIntersectEdges);
+
+        unsigned intersectCount(inputIntersectEdges.size());
+        if (! isInputLocusMoved)
+        {
+            /// TODO: doc this adjustment, does this normalize the edge count to always include self-intersect?
+            intersectCount++;
+        }
+
+        // isRegionCheck initiates a more detailed evidence signal threshold check process
+        //
+        // - The default process checks the total evidence summed over the entire
+        // Node intersect set. This neglects to account for the possibility that that evidence
+        // density could be low, and yet a high evidence sum could be achieved by transitive over
+        // lap of many nodes.
+        //
+        // - The regioncheck pathway sums up evidence at each genomic region. It more accurately
+        // reflects peak evidence but is somewhat slower to compute.
+        //
+        // Example:
+        //
+        // Assume each node below has an evidence count of 1.
+        //
+        // |---node1-----|
+        //           |-----node2-----|
+        //                        |-----node3---|
+        //
+        // Default evidence count:
+        // 33333333333333333333333333333333333333
+        //
+        // isRegionCheck evidence count:
+        // 11111111112222211111111222211111111111
+        //
+        //
+
+        // peak RegionCheck count will always equal default count when 2 or fewer nodes exist,
+        // so there's no reason to turn it on until we have more nodes
+        const bool isRegionCheck(intersectCount>2);
+
+        if (isRegionCheck)
+        {
+            _mergeRegions.clear();
+        }
+
+        // enumerate counts as part of the (non-RegionCheck) process to determine if the intersection set
+        // contains sufficient evidence to initiate a merge
+        unsigned mergedLocalEdgeCount(0);
+        unsigned mergedRemoteEdgeCount(0);
+
+        ///
+        /// enumerate node evidence using either the default or RegionCheck process:
+        ///
+        auto addEdgeEvidenceCount = [&](
+                                        const SVLocus& edgeLocus,
+                                        const NodeIndexType localNodeIndex,
+                                        const NodeIndexType remoteNodeIndex)
+        {
+            // total edge counts on the remote->local edge:
+            const unsigned remoteEdgeCount = edgeLocus.getEdge(remoteNodeIndex,localNodeIndex).getCount();
+
+            // total edge counts on the local->remote edge:
+            const unsigned localEdgeCount = edgeLocus.getEdge(localNodeIndex,remoteNodeIndex).getCount();
+
+            if (isRegionCheck)
+            {
+                const known_pos_range2& localRange(edgeLocus.getNode(localNodeIndex).getInterval().range);
+                const known_pos_range2& remoteRange(edgeLocus.getNode(remoteNodeIndex).getInterval().range);
+
+                _mergeRegions.localNodeOutbound.add(localRange,localEdgeCount);
+                _mergeRegions.localNodeInbound.add(localRange,remoteEdgeCount);
+                _mergeRegions.remoteNodeOutbound.add(remoteRange,remoteEdgeCount);
+                _mergeRegions.remoteNodeInbound.add(remoteRange,localEdgeCount);
+            }
+            else
+            {
+                mergedLocalEdgeCount += localEdgeCount;
+                mergedRemoteEdgeCount += remoteEdgeCount;
+            }
+        };
+
+        for (const EdgeInfoType& edgeInfo : inputIntersectEdges)
+        {
+            addEdgeEvidenceCount(getLocus(edgeInfo.first.first),edgeInfo.first.second,edgeInfo.second);
+        }
+
+        // if the input hasn't been moved into the primary locus graph yet, then we need to include the inputLocus
+        // in order to get an accurate edge intersection count:
+        if (! isInputLocusMoved)
+        {
+            addEdgeEvidenceCount(getLocus(inputAddy.first),inputNodeIndex,inputEdge.first);
+        }
+
+        if (isRegionCheck)
+        {
+            mergedLocalEdgeCount=(std::min(_mergeRegions.localNodeOutbound.maxVal(),_mergeRegions.remoteNodeInbound.maxVal()));
+            mergedRemoteEdgeCount=(std::min(_mergeRegions.localNodeInbound.maxVal(),_mergeRegions.remoteNodeOutbound.maxVal()));
+        }
+
+#ifdef DEBUG_SVL
+        log_os << logtag << " isRegionCheck: " << isRegionCheck << "\n";
+        log_os << logtag << " final merge counts"
+               << " local: " << mergedLocalEdgeCount
+               << " remote: " << mergedRemoteEdgeCount
+               << "\n";
+        checkState();
+#endif
+
+        if ((mergedLocalEdgeCount < getMinMergeEdgeCount()) &&
+            (mergedRemoteEdgeCount < getMinMergeEdgeCount())) continue;
+
+        //
+        // Add type1 mergeable nodes:
+        //
+        for (const EdgeInfoType& edgeInfo : inputIntersectEdges)
+        {
+            mergeIntersectNodes.insert(edgeInfo.first);
+        }
+
+        /// for each type1 node, add any new intersections to the signal node set:
+        ///
+        /// this is not very efficient for now -- each type1 edge added in potentially
+        /// expands the current node to intersect new signal nodes
+        /// -- this loop looks for those new signal nodes
+        ///
+
+        {
+            // this is used to search for the (rare) case where the intersection set
+            // locals overlap with the intersection set remotes
+            std::set<NodeAddressType> inputIntersectRemotes;
+            for (const EdgeInfoType& edgeInfo : inputIntersectEdges)
+            {
+                inputIntersectRemotes.insert(std::make_pair(edgeInfo.first.first,edgeInfo.second));
+            }
+
+            bool isIntersectRemotes(false);
+
+            // check both the original node and intersected nodes for intersection to
+            // any of the group's remotes, and for new type2 signal intersect:
+            findSignalNodes(inputLocusIndex, inputAddy, signalIntersectNodes, inputIntersectRemotes, isIntersectRemotes);
+            for (const EdgeInfoType& edgeInfo : inputIntersectEdges)
+            {
+                findSignalNodes(inputLocusIndex, edgeInfo.first, signalIntersectNodes, inputIntersectRemotes, isIntersectRemotes);
+            }
+
+            if (isIntersectRemotes)
+            {
+                for (const NodeAddressType& intersectAddy : inputIntersectRemotes)
+                {
+#ifdef DEBUG_SVL
+                    log_os << logtag << " adding ownRemote: " << intersectAddy << "\n";
+#endif
+                    mergeIntersectNodes.insert(intersectAddy);
+
+                    // check to see if this adds even more signal nodes!
+                    findSignalNodes(inputLocusIndex, intersectAddy, signalIntersectNodes, inputIntersectRemotes, isIntersectRemotes);
+                }
+            }
+        }
+        //
+        // Add type2 mergeable nodes:
+        //
+        for (const NodeAddressType& signalAddy : signalIntersectNodes)
+        {
+            mergeIntersectNodes.insert(signalAddy);
+        }
+    }
+
+#ifdef DEBUG_SVL
+    log_os << logtag << " END. IntersectNodeSize: " << mergeIntersectNodes.size() << " Nodes:\n";
+    for (const NodeAddressType addy : mergeIntersectNodes)
+    {
+        log_os << logtag << "\tInode: " << addy << "\n";
+    }
+#endif
+}
+
+
+
+void
+SVLocusSet::
+getRegionIntersect(
+    const GenomeInterval interval,
+    std::set<NodeAddressType>& intersectNodes)
+{
+    const LocusIndexType startLocusIndex(insertLocus(SVLocus()));
+    const NodeIndexType nodeIndex(getLocus(startLocusIndex).addNode(interval, this));
+
+    getNodeIntersect(startLocusIndex, nodeIndex, intersectNodes);
+
+    clearLocus(startLocusIndex);
+}
+
+
+
+void
+SVLocusSet::
+moveIntersectToLowIndex(
+    const std::set<NodeAddressType>& intersectNodes,
+    const LocusIndexType startLocusIndex,
+    LocusIndexType& locusIndex)
+{
+    const unsigned startHeadLocusIndex(locusIndex);
+
+    // assign all intersect clusters to the lowest index number
+    const bool isClearSource(startLocusIndex!=startHeadLocusIndex);
+
+    // get lowest index number that is not startLocusIndex:
+    bool isFirst(true);
+    for (const NodeAddressType& val : intersectNodes)
+    {
+        if ((!isFirst) && (val.first >= locusIndex)) continue;
+        locusIndex = val.first;
+        isFirst=false;
+    }
+
+    combineLoci(startHeadLocusIndex,locusIndex,isClearSource);
+    for (const NodeAddressType& val : intersectNodes)
+    {
+        combineLoci(val.first,locusIndex);
+    }
+
+#ifdef DEBUG_SVL
+    static const std::string logtag("SVLocusSet::moveIntersectToLowIndex");
+    log_os << logtag << " Reassigned all intersecting nodes to locusIndex: " << locusIndex << " startHeadLocusIndex: " << startHeadLocusIndex << " startLocusIndex:" << startLocusIndex << "\n";
+    checkState();
+#endif
+}
+
+
+
+void
+SVLocusSet::
+combineLoci(
+    const LocusIndexType fromIndex,
+    const LocusIndexType toIndex,
+    const bool isClearSource)
+{
+    assert(toIndex<_loci.size());
+
+#ifdef DEBUG_SVL
+    static const std::string logtag("SVLocusSet::combineLoci");
+    log_os << logtag << " from: " << fromIndex << " toIndex: " << toIndex << " isClear:" << isClearSource << "\n";
+#endif
+
+    if (fromIndex == toIndex) return;
+    if (fromIndex>=_loci.size()) return;
+
+    SVLocus& fromLocus(_loci[fromIndex]);
+    if (fromLocus.empty()) return;
+
+    SVLocus& toLocus(_loci[toIndex]);
+    toLocus.copyLocus(fromLocus, this);
+    if (isClearSource) clearLocus(fromIndex);
+}
+
+
+
+LocusIndexType
+SVLocusSet::
+insertLocus(
+    const SVLocus& inputLocus)
+{
+    assert(_isIndexed);
+
+    LocusIndexType locusIndex(0);
+    if (_emptyLoci.empty())
+    {
+        static const unsigned maxIndex(std::numeric_limits<LocusIndexType>::max());
+        locusIndex=_loci.size();
+        assert(locusIndex<maxIndex);
+        _loci.resize(locusIndex+1);
+    }
+    else
+    {
+        locusIndex=(*_emptyLoci.begin());
+        assert(_loci[locusIndex].empty());
+        _emptyLoci.erase(locusIndex);
+    }
+
+    SVLocus& locus(_loci[locusIndex]);
+    locus.updateIndex(locusIndex);
+    locus.copyLocus(inputLocus, this);
+    return locusIndex;
+}
+
+
+
+void
+SVLocusSet::
+mergeNodePtr(NodeAddressType fromPtr,
+             NodeAddressType toPtr)
+{
+#ifdef DEBUG_SVL
+    static const std::string logtag("SVLocusSet::mergeNodePtr");
+    log_os << logtag << " from: " << fromPtr << " to: " << toPtr << " fromLocusSize: " << getLocus(fromPtr.first).size() << "\n";
+#endif
+    assert(_isIndexed);
+
+    LocusSetIndexerType::iterator iter(_inodes.data().find(toPtr));
+    assert(iter != _inodes.data().end());
+    assert(fromPtr.first == toPtr.first);
+    getLocus(fromPtr.first).mergeNode(fromPtr.second, toPtr.second, this);
+}
+
+
+
+void
+SVLocusSet::
+clean()
+{
+    for (SVLocus& locus : _loci)
+    {
+        if (locus.empty()) continue;
+        _totalCleaned += locus.clean(getMinMergeEdgeCount(), this);
+
+        // if true, this locus is newly empty after cleaning:
+        if (locus.empty()) _emptyLoci.insert(locus.getIndex());
+    }
+#ifdef DEBUG_SVL
+    checkForOverlapNodes(true);
+#endif
+}
+
+
+
+void
+SVLocusSet::
+cleanRegion(const GenomeInterval interval)
+{
+#ifdef DEBUG_SVL
+    static const std::string logtag("SVLocusSet::cleanRegion");
+    log_os << logtag << " interval: " << interval << "\n";
+#endif
+
+    std::set<NodeAddressType> intersectNodes;
+    getRegionIntersect(interval, intersectNodes);
+
+    // process nodes in reverse to properly handle instances when a locus has
+    // multiple intersect nodes. This way we won't try to iterate into an
+    // address which has been shifted by node deletion:
+    BOOST_REVERSE_FOREACH(const NodeAddressType& val, intersectNodes)
+    {
+        SVLocus& locus(getLocus(val.first));
+        if (locus.empty()) continue;
+        _totalCleaned += locus.cleanNode(getMinMergeEdgeCount(), val.second, this);
+        if (locus.empty()) _emptyLoci.insert(locus.getIndex());
+
+#ifdef DEBUG_SVL
+        log_os << logtag << " intersect: " << val << " is_empty_after_clean: " << locus.empty() << "\n";
+#endif
+    }
+#ifdef DEBUG_SVL
+    checkForOverlapNodes(true);
+#endif
+}
+
+
+
+void
+SVLocusSet::
+dump(std::ostream& os) const
+{
+    os << "LOCUSSET_START\n";
+    for (const SVLocus& locus : _loci)
+    {
+        os << locus;
+    }
+    os << "LOCUSSET_END\n";
+}
+
+
+
+void
+SVLocusSet::
+dumpRegion(std::ostream& os,
+           const GenomeInterval interval)
+{
+    std::set<NodeAddressType> intersectNodes;
+    getRegionIntersect(interval,intersectNodes);
+
+    LocusSetIndexerType sortedNodes(*this);
+    for (const NodeAddressType& val : intersectNodes)
+    {
+        sortedNodes.data().insert(val);
+    }
+
+    for (const NodeAddressType& val : sortedNodes.data())
+    {
+        os << "SVNode LocusIndex:NodeIndex : " << val << "\n";
+        os << getNode(val);
+    }
+}
+
+
+
+
+void
+SVLocusSet::
+dumpStats(
+    std::ostream& os) const
+{
+    static const char sep('\t');
+
+    os << "GraphBuildTime" << sep;
+    _buildTime.reportHr(os);
+    os << "\n";
+    os << "GraphMergeTime" << sep;
+    _mergeTime.reportHr(os);
+    os << "\n";
+    os << "disjointSubgraphs" << sep << nonEmptySize() << "\n";
+    os << "nodes" << sep << totalNodeCount() << "\n";
+    os << "directedEdges" << sep << totalEdgeCount() << "\n";
+    os << "selfEdges" << sep << selfEdgeCount() << "\n";
+    os << "totalGraphEvidence" << sep << totalObservationCount() << "\n";
+    os << "totalCleaned" << sep << _totalCleaned << "\n";
+    os << "highestSearchCount" << sep << _highestSearchCount << "\n";
+    os << "isMaxSearchCount" << sep << _isMaxSearchCount << "\n";
+    os << "highestSearchDensity" << sep << _highestSearchDensity << "\n";
+    os << "isMaxSearchDensity" << sep << _isMaxSearchDensity << "\n";
+
+    /// TODO: Add real sample labels:
+    {
+        std::vector<std::string> labels;
+        const unsigned csize(_counts.size());
+        for (unsigned i(0); i<csize; ++i)
+        {
+            std::ostringstream oss;
+            oss << "Sample" << i;
+            labels.push_back(oss.str());
+        }
+        _counts.write(os,labels);
+    }
+    os << "\n";
+
+    // node region size quantiles
+    {
+        SizeDistribution nodeSize;
+        for (const SVLocus& locus : *this)
+        {
+            for (const SVLocusNode& node : locus)
+            {
+                const unsigned regionSize(node.getInterval().range.size());
+                nodeSize.addObservation(regionSize);
+            }
+        }
+
+        static const float quantLevel[] = { 0.25f, 0.5f, 0.75f, 0.9f, 0.95f, 0.99f };
+        static const unsigned quantLevelCount(sizeof(quantLevel)/sizeof(float));
+        os << "NodeRegionSizequantile:\n";
+        for (unsigned i(0); i<quantLevelCount; ++i)
+        {
+            os << quantLevel[i] << sep
+               << nodeSize.quantile(quantLevel[i]) << "\n";
+        }
+    }
+
+    {
+        // node edge count distro: 0.1,2,3... X+
+        static const unsigned maxEdgeCount(10);
+        std::vector<unsigned> edgeCount(maxEdgeCount);
+        getNodeEdgeCountDistro(edgeCount);
+        os << "NodeEdgeCount:\n";
+        for (unsigned i(0); i<maxEdgeCount; ++i)
+        {
+            os << i;
+            if ((i+1) == maxEdgeCount) os << '+';
+            os << sep << edgeCount[i] << "\n";
+        }
+    }
+
+    {
+        // node obs distro: 0,1,2,3... X+
+        static const unsigned maxObsCount(30);
+        std::vector<unsigned> obsCount(maxObsCount);
+        getNodeObsCountDistro(obsCount);
+        os << "NodeObservationCount:\n";
+        for (unsigned i(0); i<maxObsCount; ++i)
+        {
+            os << i;
+            if ((i+1) == maxObsCount) os << '+';
+            os << sep << obsCount[i] << "\n";
+        }
+    }
+}
+
+
+void
+SVLocusSet::
+dumpLocusStats(std::ostream& os) const
+{
+    static const char sep('\t');
+
+    os << "locusIndex"
+       << sep << "nodeCount"
+       << sep << "nodeObsCount"
+       << sep << "maxNodeObsCount"
+       << sep << "regionSize"
+       << sep << "maxRegionSize"
+       << sep << "edgeCount"
+       << sep << "maxEdgeCount"
+       << sep << "edgeObsCount"
+       << sep << "maxEdgeObsCount"
+       << '\n';
+
+    LocusIndexType locusIndex(0);
+    for (const SVLocus& locus : _loci)
+    {
+        unsigned locusNodeObsCount(0), maxNodeObsCount(0);
+        unsigned locusRegionSize(0), maxRegionSize(0);
+        unsigned locusEdgeCount(0), maxEdgeCount(0), locusEdgeObsCount(0), maxEdgeObsCount(0);
+        for (const SVLocusNode& node : locus)
+        {
+            // nodes:
+            const unsigned nodeObsCount(node.outCount());
+            maxNodeObsCount = std::max(maxNodeObsCount,nodeObsCount);
+            locusNodeObsCount += nodeObsCount;
+
+            // regions:
+            const unsigned regionSize(node.getInterval().range.size());
+            maxRegionSize = std::max(maxRegionSize,regionSize);
+            locusRegionSize += regionSize;
+
+            // edges:
+            maxEdgeCount = std::max(maxEdgeCount,node.size());
+            locusEdgeCount += node.size();
+            const SVLocusEdgeManager edgeMap(node.getEdgeManager());
+            for (const SVLocusEdgesType::value_type& edge : edgeMap.getMap())
+            {
+                const unsigned edgeObsCount(edge.second.getCount());
+                maxEdgeObsCount = std::max(maxEdgeObsCount,edgeObsCount);
+                locusEdgeObsCount += edgeObsCount;
+            }
+        }
+        os << locusIndex
+           << sep << locus.size()
+           << sep << locusNodeObsCount
+           << sep << maxNodeObsCount
+           << sep << locusRegionSize
+           << sep << maxRegionSize
+           << sep << locusEdgeCount
+           << sep << maxEdgeCount
+           << sep << locusEdgeObsCount
+           << sep << maxEdgeObsCount
+           << "\n";
+        locusIndex++;
+    }
+}
+
+
+
+void
+SVLocusSet::
+save(const char* filename) const
+{
+    using namespace boost::archive;
+
+    assert(NULL != filename);
+    std::ofstream ofs(filename, std::ios::binary);
+    binary_oarchive oa(ofs);
+
+    oa << header;
+    oa << _opt;
+    oa << _isFinalized;
+    oa << _totalCleaned;
+    oa << _counts;
+    oa << _highestSearchCount;
+    oa << _highestSearchDensity;
+    oa << _isMaxSearchCount;
+    oa << _isMaxSearchDensity;
+    oa << _buildTime;
+    oa << _mergeTime;
+
+    for (const SVLocus& locus : _loci)
+    {
+        if (locus.empty()) continue;
+        oa << locus;
+    }
+}
+
+
+
+void
+SVLocusSet::
+load(
+    const char* filename,
+    const bool isSkipIndex)
+{
+    using namespace boost::archive;
+
+#ifdef DEBUG_SVL
+    log_os << "SVLocusSet::load BEGIN\n";
+#endif
+
+    clear();
+
+    assert(nullptr != filename);
+    std::ifstream ifs(filename, std::ios::binary);
+    binary_iarchive ia(ifs);
+
+    _source=filename;
+
+    ia >> header;
+    ia >> _opt;
+    ia >> _isFinalized;
+    ia >> _totalCleaned;
+    ia >> _counts;
+    ia >> _highestSearchCount;
+    ia >> _highestSearchDensity;
+    ia >> _isMaxSearchCount;
+    ia >> _isMaxSearchDensity;
+    ia >> _buildTime;
+    ia >> _mergeTime;
+
+    SVLocus locus;
+    while (ifs.peek() != EOF)
+    {
+        locus.clear(this);
+        ia >> locus;
+        if (locus.empty()) continue;
+        const LocusIndexType locusIndex(size());
+        _loci.push_back(locus);
+        SVLocus& locusCopy(_loci.back());
+        locusCopy.updateIndex(locusIndex);
+    }
+
+    if (! isSkipIndex)
+    {
+        reconstructIndex();
+        checkState(true,true);
+    }
+    else
+    {
+        _isIndexed = false;
+    }
+
+#ifdef DEBUG_SVL
+    log_os << "SVLocusSet::load END\n";
+#endif
+}
+
+
+
+void
+SVLocusSet::
+reconstructIndex()
+{
+#ifdef DEBUG_SVL
+    log_os << "reconstructIndex BEGIN\n";
+#endif
+    clearIndex();
+
+#ifdef DEBUG_SVL
+    log_os << "reconstructIndex cleared\n";
+#endif
+
+    LocusIndexType locusIndex(0);
+    for (SVLocus& locus : _loci)
+    {
+        const unsigned nodeCount(locus.size());
+        for (NodeIndexType nodeIndex(0); nodeIndex<nodeCount; ++nodeIndex)
+        {
+            const NodeAddressType addy(std::make_pair(locusIndex,nodeIndex));
+            _inodes.data().insert(addy);
+            updateMaxRegionSize(getNode(addy).getInterval());
+        }
+        if (locus.empty()) _emptyLoci.insert(locusIndex);
+        locusIndex++;
+    }
+
+    _isIndexed=true;
+
+#ifdef DEBUG_SVL
+    log_os << "reconstructIndex END\n";
+#endif
+}
+
+
+
+void
+SVLocusSet::
+dumpIndex(std::ostream& os) const
+{
+    assert(_isIndexed);
+
+    os << "SVLocusSet Index START\n";
+    for (const NodeAddressType& in : _inodes.data())
+    {
+        os << "SVNodeIndex: " << in << "\n";
+    }
+    os << "SVLocusSet Index END\n";
+}
+
+
+
+void
+SVLocusSet::
+checkState(
+    const bool isCheckOverlap,
+    const bool isCheckLocusConnected) const
+{
+    using namespace illumina::common;
+
+    assert(_isIndexed);
+
+    unsigned locusIndex(0);
+    unsigned checkStateTotalNodeCount(0);
+    for (const SVLocus& locus : _loci)
+    {
+        locus.checkState(isCheckLocusConnected);
+
+        const unsigned nodeCount(locus.size());
+        checkStateTotalNodeCount += nodeCount;
+
+        if (nodeCount == 0)
+        {
+            if (_emptyLoci.count(locusIndex) == 0)
+            {
+                std::ostringstream oss;
+                oss << "ERROR: empty locus is not updated in the empty index. Locus index: " << locusIndex << "\n";
+                BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+            }
+        }
+
+        for (NodeIndexType nodeIndex(0); nodeIndex<nodeCount; ++nodeIndex)
+        {
+            LocusSetIndexerType::const_iterator citer(_inodes.data().find(std::make_pair(locusIndex,nodeIndex)));
+            if (citer == _inodes.data().end())
+            {
+                std::ostringstream oss;
+                oss << "ERROR: locus node is missing from node index\n"
+                    << "\tNode index: " << locusIndex << " node: " << getNode(std::make_pair(locusIndex,nodeIndex));
+                BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+            }
+            if ((citer->first != locusIndex) || (citer->second != nodeIndex))
+            {
+                std::ostringstream oss;
+                oss << "ERROR: locus node has conflicting index number in node index\n"
+                    << "\tinode index_value: " << citer->first << ":" << citer->second << "\n"
+                    << "\tNode index: " << locusIndex << ":" << locusIndex << " node: " << getNode(std::make_pair(locusIndex,nodeIndex));
+                BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+            }
+        }
+        locusIndex++;
+    }
+
+    if (checkStateTotalNodeCount != _inodes.data().size())
+    {
+        using namespace illumina::common;
+        std::ostringstream oss;
+        oss << "ERROR: SVLocusSet conflicting internal node counts. TotalNodeCount: " << checkStateTotalNodeCount << " inodeSize: " << _inodes.data().size() << "n";
+        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+    }
+
+    if (! isCheckOverlap) return;
+
+    // if isOverlapAllowed() then we should expect noise nodes to overlap, but we can still check signal nodes:
+    const bool isFilterNoise(isOverlapAllowed());
+    checkForOverlapNodes(isFilterNoise);
+}
+
+
+
+#if 0
+void
+SVLocusSet::
+compressSingletonNodes() const
+{
+    using namespace illumina::common;
+
+    bool isFirst(true);
+    GenomeInterval lastInterval;
+    NodeAddressType lastAddy;
+    for (const NodeAddressType& addy : _inodes.data())
+    {
+        if (isNoiseNode(addy)) continue;
+
+        if (! isSingletonNode(addy)) continue;
+
+        const GenomeInterval& interval(getNode(addy).getInterval());
+
+        // don't allow zero-length or negative intervals:
+        assert(interval.range.begin_pos() < interval.range.end_pos());
+
+        // compress nearby singleton nodes into one:
+        if (isFirst)
+        {
+            isFirst=false;
+        }
+        else if (interval.tid == lastInterval.tid)
+        {
+            if (lastInterval.range.end_pos() > interval.range.begin_pos())
+            {
+                std::ostringstream oss;
+                oss << "ERROR: Overlapping nodes in graph\n"
+                    << "\tlast_index: " << lastAddy << " interval: " << lastInterval << "\n"
+                    << "\tthis_index: " << addy << " interval: " << interval << "\n"
+                    << "\tlast_node: " << lastAddy << " "<< getNode(lastAddy) << "\n"
+                    << "\tthis_node: " << addy << " "<< getNode(addy) << "\n"
+                    << "\n"
+                    << header << "\n";
+                BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+            }
+        }
+        lastAddy = addy;
+        lastInterval = interval;
+    }
+}
+#endif
+
+
+
+void
+SVLocusSet::
+checkForOverlapNodes(
+    const bool isFilterNoise) const
+{
+    using namespace illumina::common;
+
+    bool isFirst(true);
+    GenomeInterval lastInterval;
+    NodeAddressType lastAddy;
+    for (const NodeAddressType& addy : _inodes.data())
+    {
+        if (isFilterNoise)
+        {
+            if (isNoiseNode(addy)) continue;
+        }
+
+        const GenomeInterval& interval(getNode(addy).getInterval());
+
+        // don't allow zero-length or negative intervals:
+        assert(interval.range.begin_pos() < interval.range.end_pos());
+
+        // don't allow overlapping intervals:
+        if (isFirst)
+        {
+            isFirst=false;
+        }
+        else if (interval.tid == lastInterval.tid)
+        {
+            if (lastInterval.range.end_pos() > interval.range.begin_pos())
+            {
+                std::ostringstream oss;
+                oss << "ERROR: Overlapping nodes in graph\n"
+                    << "\tlast_index: " << lastAddy << " interval: " << lastInterval << "\n"
+                    << "\tthis_index: " << addy << " interval: " << interval << "\n"
+                    << "\tlast_node: " << lastAddy << " "<< getNode(lastAddy) << "\n"
+                    << "\tthis_node: " << addy << " "<< getNode(addy) << "\n"
+                    << "\n"
+                    << header << "\n";
+                BOOST_THROW_EXCEPTION(LogicException(oss.str()));
+            }
+        }
+        lastAddy = addy;
+        lastInterval = interval;
+    }
+}
+
diff --git a/src/c++/lib/svgraph/SVLocusSet.hh b/src/c++/lib/svgraph/SVLocusSet.hh
new file mode 100644
index 0000000..7a379b3
--- /dev/null
+++ b/src/c++/lib/svgraph/SVLocusSet.hh
@@ -0,0 +1,688 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "blt_util/RegionSum.hh"
+#include "blt_util/time_util.hh"
+#include "htsapi/bam_header_info.hh"
+#include "manta/SVBreakend.hh"
+#include "svgraph/SVLocusSampleCounts.hh"
+#include "options/SVLocusSetOptions.hh"
+#include "svgraph/SVLocus.hh"
+
+#include <algorithm>
+#include <iosfwd>
+#include <string>
+#include <vector>
+
+
+#ifdef DEBUG_SVL
+#include <iostream>
+#include "blt_util/log.hh"
+#endif
+
+
+/// A set of SVLocus objects comprising a full locus graph
+///
+/// When finalized, the SVLocusSet contains non-overlapping SVLoci
+///
+struct SVLocusSet : public flyweight_observer<SVLocusNodeMoveMessage>
+{
+    typedef std::vector<SVLocus> locusset_type;
+    typedef locusset_type::const_iterator const_iterator;
+
+    explicit
+    SVLocusSet(
+        const SVLocusSetOptions& opt = SVLocusSetOptions()) :
+        _opt(opt),
+        _inodes(*this),
+        _source("UNKNOWN"),
+        _isFinalized(false),
+        _totalCleaned(0),
+        _highestSearchCount(0),
+        _highestSearchDensity(0),
+        _isMaxSearchCount(false),
+        _isMaxSearchDensity(false),
+        _isIndexed(true)
+    {}
+
+    bool
+    empty() const
+    {
+        return _loci.empty();
+    }
+
+    unsigned
+    size() const
+    {
+        return _loci.size();
+    }
+
+    unsigned
+    nonEmptySize() const
+    {
+        assert(_isIndexed);
+        return size()-_emptyLoci.size();
+    }
+
+    const_iterator
+    begin() const
+    {
+        return _loci.begin();
+    }
+
+    const_iterator
+    end() const
+    {
+        return _loci.end();
+    }
+
+    const SVLocus&
+    getLocus(const LocusIndexType index) const
+    {
+#ifdef DEBUG_SVL
+        if (index>=_loci.size()) locusHurl(index,"const");
+#endif
+
+        assert(index<_loci.size());
+        return _loci[index];
+    }
+
+    /// merge locus into this:
+    ///
+    /// locus is destroyed in this process
+    ///
+    void
+    merge(const SVLocus& locus);
+
+    /// merge locus set into this:
+    ///
+    /// locus set is destroyed in this process
+    ///
+    void
+    merge(const SVLocusSet& set);
+
+    void
+    clear()
+    {
+        _loci.clear();
+        clearIndex();
+        _isFinalized=false;
+        _totalCleaned=0;
+        _counts.clear();
+        _highestSearchCount=0;
+        _highestSearchDensity=0;
+
+        _isMaxSearchCount=false;
+        _isMaxSearchDensity=false;
+
+        _isIndexed=true;
+    }
+
+    /// indicate that the set is complete
+    void
+    finalize()
+    {
+        clean();
+        _isFinalized=true;
+    }
+
+    /// remove all existing edges with less than minMergeEdgeCount support:
+    void
+    clean();
+
+    void
+    cleanRegion(const GenomeInterval interval);
+
+    unsigned
+    totalCleaned() const
+    {
+        return _totalCleaned;
+    }
+
+    // binary serialization
+    void
+    save(const char* filename) const;
+
+    /// restore from serialization
+    ///
+    /// \param[in] isSkipIndex if true, don't build the graph index, and only allow a limited set of operations:
+    ///
+    void
+    load(
+        const char* filename,
+        const bool isSkipIndex = false);
+
+    // debug output
+    void
+    dump(std::ostream& os) const;
+
+    // debug output
+    void
+    dumpRegion(
+        std::ostream& os,
+        const GenomeInterval interval);
+
+    // dump stats on the whole SVLocus set:
+    void
+    dumpStats(std::ostream& os) const;
+
+    // dump stats on each locus in tsv format:
+    void
+    dumpLocusStats(std::ostream& os) const;
+
+    const std::string&
+    getSource() const
+    {
+        return _source;
+    }
+
+    unsigned
+    getMinMergeEdgeCount() const
+    {
+        return _opt.getMinMergeEdgeCount();
+    }
+
+    // total number of reads used as supporting evidence in the graph
+    unsigned
+    totalObservationCount() const
+    {
+        unsigned sum(0);
+        for (const SVLocus& locus : *this)
+        {
+            sum += locus.totalObservationCount();
+        }
+        return sum;
+    }
+
+    // total nodes in the graph
+    unsigned
+    totalNodeCount() const
+    {
+        unsigned sum(0);
+        for (const SVLocus& locus : *this)
+        {
+            sum += locus.size();
+        }
+        return sum;
+    }
+
+    /// get total number of directed edges in the graph
+    unsigned
+    totalEdgeCount() const
+    {
+        unsigned sum(0);
+        for (const SVLocus& locus : *this)
+        {
+            sum += locus.totalEdgeCount();
+        }
+        return sum;
+    }
+
+    /// get total number of self-edges in the graph
+    unsigned
+    selfEdgeCount() const
+    {
+        unsigned sum(0);
+        for (const SVLocus& locus : *this)
+        {
+            sum += locus.selfEdgeCount();
+        }
+        return sum;
+    }
+
+    /// fill node edge count histogram up to edgeCount.size()
+    void
+    getNodeEdgeCountDistro(std::vector<unsigned>& edgeCount) const
+    {
+        for (const SVLocus& locus : *this)
+        {
+            locus.getNodeEdgeCountDistro(edgeCount);
+        }
+    }
+
+    /// fill node observation count histogram up to obsCount.size()
+    void
+    getNodeObsCountDistro(std::vector<unsigned>& obsCount) const
+    {
+        for (const SVLocus& locus : *this)
+        {
+            locus.getNodeObsCountDistro(obsCount);
+        }
+    }
+
+    /// check that internal data-structures are in
+    /// a consistent state, throw on error
+    void
+    checkState(
+        const bool isCheckOverlap = false,
+        const bool isCheckLocusConnected = false) const;
+
+    /// updater gets direct access to read counts:
+    AllCounts&
+    getCounts()
+    {
+        return _counts;
+    }
+
+    const AllCounts&
+    getCounts() const
+    {
+        return _counts;
+    }
+
+    void
+    setBuildTime(
+        const CpuTimes& t)
+    {
+        _buildTime = t;
+    }
+
+    void
+    setMergeTime(
+        const CpuTimes& t)
+    {
+        _mergeTime = t;
+    }
+
+    typedef std::pair<LocusIndexType,NodeIndexType> NodeAddressType;
+
+    /// get all nodes in this object which intersect with
+    /// an external node
+    ///
+    /// this is effectively const
+    void
+    getRegionIntersect(
+        const GenomeInterval interval,
+        std::set<NodeAddressType>& intersectNodes);
+
+private:
+
+    typedef NodeAddressType EdgeMapKeyType;
+    typedef NodeIndexType EdgeMapValueType;
+
+    typedef std::multimap<EdgeMapKeyType, EdgeMapValueType> EdgeMapType;
+
+    typedef std::pair<EdgeMapKeyType, EdgeMapValueType> EdgeInfoType;
+
+    struct NodeAddressSorter
+    {
+        NodeAddressSorter(const SVLocusSet& set) :
+            _set(set)
+        {}
+
+        bool
+        operator()(
+            const NodeAddressType& a,
+            const NodeAddressType& b) const
+        {
+            if (getInterval(a)<getInterval(b)) return true;
+            if (getInterval(a)==getInterval(b))
+            {
+                return (a<b);
+            }
+            return false;
+        }
+
+    private:
+        const GenomeInterval&
+        getInterval(const NodeAddressType& n) const
+        {
+            return (_set.getLocus(n.first).getNode(n.second).getInterval());
+        }
+
+        const SVLocusSet& _set;
+    };
+
+    // wrap this set in an object b/c a special copy-ctor is required
+    struct LocusSetIndexerType
+    {
+        typedef std::set<NodeAddressType, NodeAddressSorter> data_t;
+        typedef data_t::iterator iterator;
+        typedef data_t::const_iterator const_iterator;
+
+        LocusSetIndexerType(const SVLocusSet& set)
+            : _data(NodeAddressSorter(set))
+        {}
+
+        LocusSetIndexerType(const LocusSetIndexerType& rhs) = delete;
+
+        LocusSetIndexerType& operator=(const LocusSetIndexerType& rhs)
+        {
+            if (this == &rhs) return *this;
+            _data.clear();
+            _data.insert(rhs._data.begin(),rhs._data.end());
+            return *this;
+        }
+
+        data_t&
+        data()
+        {
+            return _data;
+        }
+
+        const data_t&
+        data() const
+        {
+            return _data;
+        }
+
+    private:
+        data_t _data;
+    };
+
+    friend
+    std::ostream&
+    operator<<(std::ostream& os, const NodeAddressType& a);
+
+    SVLocus&
+    getLocus(const LocusIndexType index)
+    {
+#ifdef DEBUG_SVL
+        if (index>=_loci.size()) locusHurl(index,"non-const");
+#endif
+
+        assert(index<_loci.size());
+        return _loci[index];
+    }
+
+    void
+    locusHurl(const LocusIndexType index, const char* label) const;
+
+
+    const SVLocusNode&
+    getNode(const NodeAddressType n) const
+    {
+        return getLocus(n.first).getNode(n.second);
+    }
+
+    void
+    clearLocus(const LocusIndexType index)
+    {
+#ifdef DEBUG_SVL
+        log_os << "SVLocusSet::clearLocus index: " << index << "\n";
+#endif
+        assert(index<_loci.size());
+
+        _loci[index].clear(this);
+        _emptyLoci.insert(index);
+        _source="UNKNOWN";
+    }
+
+    /// shared node intersection utility
+    ///
+    /// \param[in] searchNodes the set of nodes to search for intersections in
+    /// \param[in] filterLocusIndex ignore intersections from this locus
+    ///
+    /// \param[in] isTestUsability check whether a node intersection exceeds computablility limits
+    ///
+    /// \return is usable node (can only be false when isTestUsability is true)
+    ///
+    bool
+    getNodeIntersectCore(
+        const LocusIndexType inputLocusIndex,
+        const NodeIndexType inputNodeIndex,
+        const LocusSetIndexerType& searchNodes,
+        const LocusIndexType filterLocusIndex,
+        std::set<NodeAddressType>& intersectNodes,
+        const bool isTestUsability = false) const;
+
+    /// get all nodes in this object which intersect with the inputNode
+    ///
+    /// \param[in] isTestUsability check whether a node intersection exceeds computability limits
+    ///
+    /// \return is usable node (can only be false when isTestUsability is true)
+    ///
+    bool
+    getNodeIntersect(
+        const LocusIndexType locusIndex,
+        const NodeIndexType nodeIndex,
+        std::set<NodeAddressType>& intersectNodes,
+        const bool isTestUsability = false) const
+    {
+        return getNodeIntersectCore(locusIndex, nodeIndex, _inodes, locusIndex, intersectNodes, isTestUsability);
+    }
+
+    /// edges returned are in local_addy->remote_node orientation
+    void
+    getIntersectingEdgeNodes(
+        const LocusIndexType inputLocusIndex,
+        const NodeIndexType inputRemoteNodeIndex,
+        const EdgeMapType& remoteIntersectNodeToLocalNodeMap,
+        const LocusSetIndexerType& remoteIntersectNodes,
+        std::vector<EdgeInfoType>& edges) const;
+
+    /// find nodes which could be merged with the input node, accounting for edge overlap and noise thresholds
+    ///
+    /// \param[in] isInputLocusMoved has the input locus been moved into the graph from an initial temporary locus?
+    /// \param[out] mergeIntersect nodes which could be merged with input
+    ///
+    void
+    getNodeMergeableIntersect(
+        const LocusIndexType inputLocusIndex,
+        const NodeIndexType inputNodeIndex,
+        const bool isInputLocusMoved,
+        std::set<NodeAddressType>& mergeIntersect) const;
+
+    /// assign all intersect clusters to the lowest index number that is not startLocusIndex
+    ///
+    void
+    moveIntersectToLowIndex(
+        const std::set<NodeAddressType>& intersectNodes,
+        const LocusIndexType startLocusIndex,
+        LocusIndexType& locusIndex);
+
+    /// combine all content from 'from' locus into 'to' locus
+    ///
+    /// this is typically required when a node is merged
+    /// which combines two loci
+    void
+    combineLoci(
+        const LocusIndexType fromIndex,
+        const LocusIndexType toIndex,
+        const bool isClearSource = true);
+
+
+    /// add locus to this locusSet (intermediate step in merging)
+    LocusIndexType
+    insertLocus(
+        const SVLocus& inputLocus);
+
+    void
+    removeNode(const NodeAddressType inputNodePtr)
+    {
+        assert(_isIndexed);
+
+        LocusSetIndexerType::iterator iter(_inodes.data().find(inputNodePtr));
+        if (iter == _inodes.data().end()) return;
+
+        SVLocus& locus(getLocus(inputNodePtr.first));
+        locus.eraseNode(inputNodePtr.second, this);
+    }
+
+    bool
+    isNoiseNode(const NodeAddressType inputAddy) const
+    {
+        return getLocus(inputAddy.first).isNoiseNode(getMinMergeEdgeCount(),inputAddy.second);
+    }
+
+    /// node with a self edge only:
+    bool
+    isSingletonNode(const NodeAddressType inputAddy) const
+    {
+        const SVLocusNode& inputNode(getNode(inputAddy));
+        return ((inputNode.size() == 1) && inputNode.isEdge(inputAddy.second));
+    }
+
+    bool
+    isOverlapAllowed() const
+    {
+        return (! _isFinalized);
+    }
+
+    void
+    mergeNodePtr(
+        NodeAddressType fromPtr,
+        NodeAddressType toPtr);
+
+    /// update index when nodes are moved:
+    void
+    recieve_flyweight_notification(const SVLocusNodeMoveMessage& msg)
+    {
+        assert(_isIndexed);
+
+        if (msg.first)
+        {
+            // add
+#ifdef DEBUG_SVL
+            log_os << "SVLocusSetObserver: Adding node: " << msg.second.first << ":" << msg.second.second << "\n";
+#endif
+            _inodes.data().insert(msg.second);
+            updateMaxRegionSize(getNode(msg.second).getInterval());
+        }
+        else
+        {
+            // delete
+#ifdef DEBUG_SVL
+            log_os << "SVLocusSetObserver: Deleting node: " << msg.second.first << ":" << msg.second.second << "\n";
+#endif
+            _inodes.data().erase(msg.second);
+        }
+    }
+
+
+    void
+    updateMaxRegionSize(const GenomeInterval& interval)
+    {
+        assert(interval.tid>=0);
+        const unsigned tid(interval.tid);
+        if (tid >= _maxRegionSize.size())
+        {
+            _maxRegionSize.resize((tid+1),0);
+        }
+        _maxRegionSize[tid] = std::max(_maxRegionSize[tid], interval.range.size());
+    }
+
+
+    void
+    reconstructIndex();
+
+    void
+    clearIndex()
+    {
+        _emptyLoci.clear();
+        _inodes.data().clear();
+        _maxRegionSize.clear();
+    }
+
+    void
+    dumpIndex(std::ostream& os) const;
+
+    /// throw an exception if any nodes are overlapping
+    ///
+    /// if isFilterNoise is true, consider only signal nodes
+    void
+    checkForOverlapNodes(
+        const bool isFilterNoise) const;
+
+    /// look for non-noise nodes intersecting the findSignalAddy node
+    ///
+    /// Noise nodes are checked for intersection to inputIntersectRemotes,
+    /// if found isIntersectRemotes is set to true
+    ///
+    void
+    findSignalNodes(
+        const LocusIndexType inputLocusIndex,
+        const NodeAddressType findSignalAddy,
+        std::set<NodeAddressType>& signalIntersectNodes,
+        const std::set<NodeAddressType>& inputIntersectRemotes,
+        bool& isIntersectRemotes) const;
+
+    ///////////////////// data
+
+public:
+    bam_header_info header;
+private:
+
+
+    struct MergeRegionSumData
+    {
+        void
+        clear()
+        {
+            localNodeOutbound.clear();
+            localNodeInbound.clear();
+            remoteNodeOutbound.clear();
+            remoteNodeInbound.clear();
+        }
+
+        // total counts for this edge:
+        using rsum_t = RegionSum<unsigned>;
+        rsum_t localNodeOutbound;
+        rsum_t localNodeInbound;
+        rsum_t remoteNodeOutbound;
+        rsum_t remoteNodeInbound;
+    };
+    SVLocusSetOptions _opt;
+
+    // contains the full set of loci
+    locusset_type _loci;
+    std::set<unsigned> _emptyLoci;
+
+    // provides an intersection search of overlapping nodes given a bound node size:
+    LocusSetIndexerType _inodes;
+
+    // maximum region size per chromosome:
+    std::vector<unsigned> _maxRegionSize;
+
+    // simple debug string describing the source of this
+    std::string _source;
+
+    // the graph has intermediate states (during build) when overlapping regions are allowed,
+    // once complete, overlaps are not present and disallowed:
+    bool _isFinalized;
+
+    AllCounts _counts;
+
+    // total number of observations removed on edges with less than minMergeEdgeCount counts
+    unsigned _totalCleaned;
+
+    mutable unsigned _highestSearchCount; ///< highest search count observed during graph build
+    mutable float _highestSearchDensity; ///< highest node density observed during graph build
+
+    mutable bool _isMaxSearchCount; ///< has input been filtered because we hit the maximum search count
+    mutable bool _isMaxSearchDensity; ///< has input been filtered because we hit the maximum node density
+
+    bool _isIndexed;
+
+    CpuTimes _buildTime;
+    CpuTimes _mergeTime;
+
+    mutable MergeRegionSumData _mergeRegions;
+};
+
+
+std::ostream&
+operator<<(std::ostream& os, const SVLocusSet::NodeAddressType& a);
diff --git a/src/c++/lib/svgraph/test/CMakeLists.txt b/src/c++/lib/svgraph/test/CMakeLists.txt
new file mode 100644
index 0000000..83c64d7
--- /dev/null
+++ b/src/c++/lib/svgraph/test/CMakeLists.txt
@@ -0,0 +1,28 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## Configuration file for the unit tests subdirectory
+##
+## author Ole Schulz-Trieglaff
+##
+################################################################################
+
+include(${THIS_CXX_TEST_LIBRARY_CMAKE})
diff --git a/src/c++/lib/svgraph/test/GenomeIntervalTest.cpp b/src/c++/lib/svgraph/test/GenomeIntervalTest.cpp
new file mode 100644
index 0000000..7c7849d
--- /dev/null
+++ b/src/c++/lib/svgraph/test/GenomeIntervalTest.cpp
@@ -0,0 +1,53 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/test/unit_test.hpp"
+
+#include "svgraph/GenomeInterval.hh"
+
+
+
+BOOST_AUTO_TEST_SUITE( test_GenomeInterval )
+
+BOOST_AUTO_TEST_CASE( test_GenomeInterval )
+{
+
+    // test that GenomeInterval sorting follows expect:
+    std::vector<GenomeInterval> test;
+
+    test.push_back(GenomeInterval(1,15,19));
+    test.push_back(GenomeInterval(1,15,22));
+    test.push_back(GenomeInterval(1,10,20));
+    test.push_back(GenomeInterval(2,5,10));
+    test.push_back(GenomeInterval(2,8,10));
+
+    std::sort(test.begin(),test.end());
+
+    BOOST_REQUIRE_EQUAL(test[0],GenomeInterval(1,10,20));
+    BOOST_REQUIRE_EQUAL(test[2],GenomeInterval(1,15,22));
+    BOOST_REQUIRE_EQUAL(test[4],GenomeInterval(2,8,10));
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/svgraph/test/GenomeIntervalUtilTest.cpp b/src/c++/lib/svgraph/test/GenomeIntervalUtilTest.cpp
new file mode 100644
index 0000000..54be847
--- /dev/null
+++ b/src/c++/lib/svgraph/test/GenomeIntervalUtilTest.cpp
@@ -0,0 +1,58 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/test/unit_test.hpp"
+
+#include "svgraph/GenomeIntervalUtil.hh"
+
+
+
+BOOST_AUTO_TEST_SUITE( test_GenomeIntervalUtil )
+
+BOOST_AUTO_TEST_CASE( test_IntervalCompressor )
+{
+    // test that GenomeInterval sorting follows expect:
+    std::vector<GenomeInterval> test;
+
+    test.push_back(GenomeInterval(1,15,19));
+    test.push_back(GenomeInterval(2,5,10));
+    test.push_back(GenomeInterval(1,10,20));
+    test.push_back(GenomeInterval(1,24,50));
+    test.push_back(GenomeInterval(2,8,10));
+    test.push_back(GenomeInterval(1,15,22));
+
+    const std::vector<unsigned> indexMap = intervalCompressor(test);
+
+    BOOST_REQUIRE_EQUAL(test.size(),3u);
+    BOOST_REQUIRE_EQUAL(test[0],GenomeInterval(1,10,22));
+    BOOST_REQUIRE_EQUAL(test[1],GenomeInterval(2,5,10));
+
+    BOOST_REQUIRE_EQUAL(indexMap.size(),6u);
+    BOOST_REQUIRE_EQUAL(indexMap[0],0u);
+    BOOST_REQUIRE_EQUAL(indexMap[5],0u);
+    BOOST_REQUIRE_EQUAL(indexMap[4],1u);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/c++/lib/svgraph/test/SVLocusNodeTest.cpp b/src/c++/lib/svgraph/test/SVLocusNodeTest.cpp
new file mode 100644
index 0000000..cd7a386
--- /dev/null
+++ b/src/c++/lib/svgraph/test/SVLocusNodeTest.cpp
@@ -0,0 +1,57 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/test/unit_test.hpp"
+
+#include "svgraph/SVLocus.hh"
+
+#include "SVLocusTestUtil.hh"
+
+
+BOOST_AUTO_TEST_SUITE( test_SVLocusNode )
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusNode_EM )
+{
+    // test the new edge manager for SVLocusNode
+    SVLocus locus1;
+    NodeIndexType nodePtr1 = locus1.addNode(GenomeInterval(1,10,20));
+    NodeIndexType nodePtr1copy = locus1.addNode(GenomeInterval(1,10,20));
+    locus1.linkNodes(nodePtr1,nodePtr1copy,1,1);
+    locus1.mergeSelfOverlap();
+
+    BOOST_REQUIRE_EQUAL(locus1.size(),1u);
+
+    const SVLocusNode& node(static_cast<const SVLocus&>(locus1).getNode(0));
+
+    const SVLocusEdgeManager em = node.getEdgeManager();
+
+    BOOST_REQUIRE_EQUAL(em.getMap().size(),1u);
+
+    BOOST_REQUIRE_EQUAL(em.getMap().begin()->first,0u);
+    BOOST_REQUIRE_EQUAL(em.getMap().begin()->second.getCount(),1u);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/c++/lib/svgraph/test/SVLocusSerializeTest.cpp b/src/c++/lib/svgraph/test/SVLocusSerializeTest.cpp
new file mode 100644
index 0000000..fb7d459
--- /dev/null
+++ b/src/c++/lib/svgraph/test/SVLocusSerializeTest.cpp
@@ -0,0 +1,173 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/archive/tmpdir.hpp"
+#include "boost/archive/text_iarchive.hpp"
+#include "boost/archive/text_oarchive.hpp"
+#include "boost/archive/binary_iarchive.hpp"
+#include "boost/archive/binary_oarchive.hpp"
+#include "boost/test/unit_test.hpp"
+
+#include "svgraph/SVLocus.hh"
+
+#include "SVLocusTestUtil.hh"
+
+#include <fstream>
+
+using namespace boost::archive;
+
+
+BOOST_AUTO_TEST_SUITE( test_SVLocusSerialize )
+
+// test serialization with a very simple class first:
+//
+template <typename InputArchiver, typename OutputArchiver>
+void
+GenomeIntervalSerializeTest(const char* extension)
+{
+    // construct a simple two-node locus
+    GenomeInterval gi(1,10,20);
+
+    std::string filename(boost::archive::tmpdir());
+    filename += "/testfile";
+    filename += extension;
+
+    // serialize
+    {
+        std::ofstream ofs(filename.c_str(), std::ios::binary);
+        OutputArchiver oa(ofs);
+        oa << gi;
+    }
+
+    GenomeInterval gi_copy;
+
+    // deserialize
+    {
+        std::ifstream ifs(filename.c_str(), std::ios::binary);
+        InputArchiver ia(ifs);
+        ia >> gi_copy;
+    }
+
+    BOOST_REQUIRE_EQUAL(gi,gi_copy);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GenomeIntervalSerializeText )
+{
+    GenomeIntervalSerializeTest<text_iarchive,text_oarchive>(".txt");
+}
+
+
+BOOST_AUTO_TEST_CASE( test_GenomeIntervalSerializeBinary )
+{
+    GenomeIntervalSerializeTest<binary_iarchive,binary_oarchive>(".bin");
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusNodeSerialze )
+{
+
+    // construct a simple two-node locus
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,1,30,40);
+
+    std::string filename(boost::archive::tmpdir());
+    filename += "/testfile.bin";
+
+    const SVLocusNode& node1(static_cast<const SVLocus&>(locus1).getNode(0));
+
+    // serialize
+    {
+        std::ofstream ofs(filename.c_str(), std::ios::binary);
+        binary_oarchive oa(ofs);
+        oa << node1;
+    }
+
+    SVLocusNode node_copy1;
+
+    // deserialize
+    {
+        std::ifstream ifs(filename.c_str(), std::ios::binary);
+        binary_iarchive ia(ifs);
+        ia >> node_copy1;
+    }
+
+    BOOST_REQUIRE_EQUAL(node1.outCount(), node_copy1.outCount());
+    BOOST_REQUIRE_EQUAL(node1.getInterval(), node_copy1.getInterval());
+    BOOST_REQUIRE_EQUAL(node1.size() ,node_copy1.size());
+
+    const SVLocusEdgeManager node1Manager(node1.getEdgeManager());
+    const SVLocusEdgeManager node1CopyManager(node_copy1.getEdgeManager());
+
+    SVLocusEdgesType::const_iterator ibegin(node1Manager.getMap().begin());
+    SVLocusNode::const_iterator copy_ibegin(node1CopyManager.getMap().begin());
+
+    BOOST_REQUIRE_EQUAL(ibegin->second.getCount(), copy_ibegin->second.getCount());
+
+    BOOST_REQUIRE_EQUAL(ibegin->first, copy_ibegin->first);
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusSerialze )
+{
+
+    // construct a simple two-node locus
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,1,30,40);
+
+    std::string filename(boost::archive::tmpdir());
+    filename += "/testfile.bin";
+
+    // serialize
+    {
+        std::ofstream ofs(filename.c_str(), std::ios::binary);
+        binary_oarchive oa(ofs);
+        oa << locus1;
+    }
+
+    SVLocus locus1_copy;
+
+    // deserialize
+    {
+        std::ifstream ifs(filename.c_str(), std::ios::binary);
+        binary_iarchive ia(ifs);
+        ia >> locus1_copy;
+    }
+
+    BOOST_REQUIRE_EQUAL(locus1.size(),locus1_copy.size());
+    const SVLocus& clocus1(locus1);
+    const SVLocus& clocus1_copy(locus1_copy);
+
+    bool isMatchFound(false);
+    for (const SVLocusNode& node : clocus1_copy)
+    {
+        if (node.getInterval() == (clocus1.begin())->getInterval()) isMatchFound=true;
+    }
+    BOOST_REQUIRE(isMatchFound);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/svgraph/test/SVLocusSetPrivateTest.cpp b/src/c++/lib/svgraph/test/SVLocusSetPrivateTest.cpp
new file mode 100644
index 0000000..222b12a
--- /dev/null
+++ b/src/c++/lib/svgraph/test/SVLocusSetPrivateTest.cpp
@@ -0,0 +1,123 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/test/unit_test.hpp"
+
+// hack to call private methods of SVLocusSet:
+//#pragma clang diagnostic ignored "-Wkeyword-macro"
+//#define private public
+
+#include "svgraph/SVLocusSet.hh"
+
+#include "SVLocusTestUtil.hh"
+
+
+BOOST_AUTO_TEST_SUITE( test_SVLocusSetPrivate )
+
+
+static
+unsigned
+testOverlap(
+    SVLocusSet& locusSet,
+    const int32_t tid,
+    const int32_t beginPos,
+    const int32_t endPos)
+{
+    std::set<SVLocusSet::NodeAddressType> intersect;
+    locusSet.getRegionIntersect(GenomeInterval(tid,beginPos,endPos),intersect);
+    return intersect.size();
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusIntersect )
+{
+    // construct a simple two-node locus
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,2,30,40);
+
+    SVLocusSet set1;
+    set1.merge(locus1);
+    set1.checkState(true,true);
+
+    // test for various intersections:
+
+    // non-overlap test:
+    BOOST_REQUIRE_EQUAL(testOverlap(set1,1,1,2),0u);
+
+    // left-edge overlap:
+    BOOST_REQUIRE_EQUAL(testOverlap(set1,1,9,11),1u);
+
+    // right-edge overlap:
+    BOOST_REQUIRE_EQUAL(testOverlap(set1,1,19,21),1u);
+
+    // non-overlap:
+    BOOST_REQUIRE_EQUAL(testOverlap(set1,1,29,31),0u);
+
+    // non-overlap (diff tid):
+    BOOST_REQUIRE_EQUAL(testOverlap(set1,2,9,11),0u);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusCombine )
+{
+    // test reassigning the locus numbers of non-overlapping loci in a set:
+
+    // construct a simple two-node locus
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,2,30,40);
+
+    SVLocus locus2;
+    locusAddPair(locus2,3,10,20,4,30,40);
+
+    SVLocus locus3;
+    locusAddPair(locus3,5,10,20,6,30,40);
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 1;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus2);
+    set1.merge(locus3);
+
+    set1.checkState(true,true);
+
+    const SVLocusSet& cset1(set1);
+
+    BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),2u);
+    BOOST_REQUIRE_EQUAL(cset1.getLocus(1).size(),2u);
+    BOOST_REQUIRE_EQUAL(cset1.getLocus(2).size(),2u);
+#if 0
+    set1.combineLoci(0,0);
+    set1.combineLoci(2,0);
+
+    BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),4u);
+    BOOST_REQUIRE_EQUAL(cset1.getLocus(1).size(),2u);
+    BOOST_REQUIRE_EQUAL(cset1.getLocus(2).size(),0u);
+#endif
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/svgraph/test/SVLocusSetSerializeTest.cpp b/src/c++/lib/svgraph/test/SVLocusSetSerializeTest.cpp
new file mode 100644
index 0000000..dd92d30
--- /dev/null
+++ b/src/c++/lib/svgraph/test/SVLocusSetSerializeTest.cpp
@@ -0,0 +1,143 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/archive/tmpdir.hpp"
+#include "boost/test/unit_test.hpp"
+
+#include "svgraph/SVLocusSet.hh"
+
+#include "SVLocusTestUtil.hh"
+
+using namespace boost::archive;
+
+
+BOOST_AUTO_TEST_SUITE( test_SVLocusSetSerialize )
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusSetSerialze )
+{
+    // construct a simple two-node locus
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,2,30,40);
+
+    SVLocus locus2;
+    locusAddPair(locus2,3,10,20,4,30,40);
+
+    SVLocusSet set1;
+    set1.merge(locus1);
+    set1.merge(locus2);
+
+    std::string filename(tmpdir());
+    filename += "/testfile.bin";
+
+    // serialize
+    set1.save(filename.c_str());
+
+    SVLocusSet set1_copy;
+
+    // deserialize
+    set1_copy.load(filename.c_str());
+
+    BOOST_REQUIRE_EQUAL(set1.size(),set1_copy.size());
+
+    typedef SVLocusSet::const_iterator citer;
+
+    citer i(set1.begin());
+    citer i_copy(set1_copy.begin());
+
+    const SVLocus& set1_locus1(*i);
+    const SVLocus& set1_copy_locus1(*i_copy);
+    BOOST_REQUIRE_EQUAL(set1_locus1.size(),set1_copy_locus1.size());
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusSetSerialze2 )
+{
+    SVLocusSet set1;
+    {
+        SVLocus locus1;
+        locusAddPair(locus1,1,10,20,2,30,40);
+
+        SVLocus locus2;
+        locusAddPair(locus2,3,10,20,4,30,40);
+
+        set1.merge(locus1);
+        set1.merge(locus2);
+    }
+
+    SVLocusSet set2;
+    {
+        SVLocus locus1;
+        locusAddPair(locus1,1,15,25,4,30,40);
+
+        SVLocus locus2;
+        locusAddPair(locus2,3,30,40,2,30,40);
+
+        set2.merge(locus1);
+        set2.merge(locus2);
+    }
+
+    SVLocusSet set1_copy;
+    {
+        std::string filename(tmpdir());
+        filename += "/testfile.bin";
+
+        // serialize
+        set1.save(filename.c_str());
+
+        // deserialize
+        set1_copy.load(filename.c_str());
+    }
+
+    SVLocusSet set2_copy;
+    {
+        std::string filename(tmpdir());
+        filename += "/testfile.bin";
+
+        // serialize
+        set2.save(filename.c_str());
+
+        // deserialize
+        set2_copy.load(filename.c_str());
+    }
+
+    set1.merge(set2);
+    set1_copy.merge(set2_copy);
+
+    BOOST_REQUIRE_EQUAL(set1.size(),set1_copy.size());
+
+    typedef SVLocusSet::const_iterator citer;
+
+    citer i(set1.begin());
+    citer i_copy(set1_copy.begin());
+
+    const SVLocus& set1_locus1(*i);
+    const SVLocus& set1_copy_locus1(*i_copy);
+    BOOST_REQUIRE_EQUAL(set1_locus1.size(),set1_copy_locus1.size());
+}
+
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/svgraph/test/SVLocusSetTest.cpp b/src/c++/lib/svgraph/test/SVLocusSetTest.cpp
new file mode 100644
index 0000000..d204045
--- /dev/null
+++ b/src/c++/lib/svgraph/test/SVLocusSetTest.cpp
@@ -0,0 +1,1172 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/test/unit_test.hpp"
+
+#include "svgraph/SVLocusSet.hh"
+
+#include "SVLocusTestUtil.hh"
+
+
+BOOST_AUTO_TEST_SUITE( test_SVLocusSet )
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusMerge )
+{
+    // test merge of overlapping loci
+
+    // construct a simple two-node locus
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,2,30,40);
+
+    SVLocus locus2;
+    locusAddPair(locus2,1,10,20,2,30,40);
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 2;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus2);
+    set1.checkState(true,true);
+    const SVLocusSet& cset1(set1);
+
+    BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+    BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),2u);
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusMultiOverlapMerge )
+{
+    // test merge of overlapping loci, reproduces production failure
+
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,12,30,40);
+
+    SVLocus locus2;
+    locusAddPair(locus2,2,10,20,12,50,60);
+
+    SVLocus locus3;
+    locusAddPair(locus3,3,10,20,12,35,55);
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 1;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus2);
+    set1.merge(locus3);
+    set1.checkState(true,true);
+    const SVLocusSet& cset1(set1);
+
+    GenomeInterval testInterval(12,30,60);
+
+    BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+    BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),4u);
+
+    bool isFound(false);
+    for (const SVLocusNode& node : cset1.getLocus(0))
+    {
+        if (node.getInterval() == testInterval) isFound=true;
+    }
+    BOOST_REQUIRE(isFound);
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusMultiOverlapMerge2 )
+{
+    // test merge of overlapping loci, reproduces production failure
+
+    SVLocus locus1;
+    {
+        NodeIndexType nodePtr1 = locus1.addNode(GenomeInterval(1,10,20));
+        NodeIndexType nodePtr2 = locus1.addNode(GenomeInterval(1,30,40));
+        NodeIndexType nodePtr3 = locus1.addNode(GenomeInterval(1,50,60));
+        locus1.linkNodes(nodePtr1, nodePtr2);
+        locus1.linkNodes(nodePtr1, nodePtr3);
+    }
+
+    SVLocus locus2;
+    locusAddPair(locus2,1,10,60,2,10,60);
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 1;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus2);
+    set1.checkState(true,true);
+    const SVLocusSet& cset1(set1);
+
+    GenomeInterval testInterval(1,10,60);
+
+    BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+    BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),2u);
+
+    bool isFound(false);
+    for (const SVLocusNode& node : cset1.getLocus(0))
+    {
+        if (node.getInterval() == testInterval) isFound=true;
+    }
+    BOOST_REQUIRE(isFound);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusMultiOverlapMerge3 )
+{
+    // test merge of overlapping loci, reproduces production failure
+
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,3,10,20);
+
+    SVLocus locus2;
+    locusAddPair(locus2,1,30,40,4,10,20);
+
+    SVLocus locus3;
+    locusAddPair(locus3,2,30,40,5,10,20);
+
+    SVLocus locus4;
+    locusAddPair(locus4,1,15,35,6,10,20);
+
+    SVLocus locus5;
+    locusAddPair(locus5,2,15,35,7,10,20);
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 1;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus2);
+    set1.merge(locus3);
+    set1.merge(locus4);
+    set1.merge(locus5);
+    set1.checkState(true,true);
+    const SVLocusSet& cset1(set1);
+
+    GenomeInterval testInterval(1,10,40);
+
+    BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),2u);
+    BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),4u);
+
+    bool isFound(false);
+    for (const SVLocusNode& node : cset1.getLocus(0))
+    {
+        if (node.getInterval() == testInterval) isFound=true;
+    }
+    BOOST_REQUIRE(isFound);
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusMultiOverlapMerge4 )
+{
+    // test merge of overlapping loci, reproduces production failure
+
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,60,2,20,30);
+
+    SVLocus locus2;
+    locusAddPair(locus2,1,40,50,1,20,30);
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 1;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus2);
+
+    const SVLocusSet& cset1(set1);
+    cset1.checkState(true,true);
+
+
+    GenomeInterval testInterval(1,10,60);
+
+    BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+    BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),2u);
+
+    bool isFound(false);
+    for (const SVLocusNode& node : cset1.getLocus(0))
+    {
+        if (node.getInterval() == testInterval) isFound=true;
+    }
+    BOOST_REQUIRE(isFound);
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusNoiseMerge )
+{
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,60,2,20,30);
+
+    SVLocus locus2;
+    locusAddPair(locus2,1,10,60,2,20,30);
+
+    SVLocus locus3;
+    locusAddPair(locus3,1,10,60,3,20,30);
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 1;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus2);
+        set1.merge(locus3);
+        const SVLocusSet& cset1(set1);
+
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),3u);
+    }
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus2);
+        set1.merge(locus3);
+        const SVLocusSet& cset1(set1);
+
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),2u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),2u);
+    }
+
+    {
+        SVLocusSetOptions sopt;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus2);
+        set1.merge(locus3);
+        const SVLocusSet& cset1(set1);
+
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),3u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),2u);
+    }
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusNoiseClean )
+{
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,60,2,20,30);
+
+    SVLocus locus2;
+    locusAddPair(locus2,1,10,60,2,20,30);
+
+    SVLocus locus3;
+    locusAddPair(locus3,1,10,60,3,20,30);
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus2);
+        set1.merge(locus3);
+        const SVLocusSet& cset1(set1);
+
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),2u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),2u);
+
+        set1.clean();
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),2u);
+    }
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus2);
+        set1.merge(locus3);
+        const SVLocusSet& cset1(set1);
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),2u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(1).size(),2u);
+
+        set1.cleanRegion(GenomeInterval(3,0,70));
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),2u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(1).size(),2u);
+
+        set1.cleanRegion(GenomeInterval(1,0,70));
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),2u);
+    }
+
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusNoiseCleanOrder )
+{
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,60,2,20,30);
+
+    SVLocus locus2;
+    locusAddPair(locus2,1,10,60,2,20,30);
+
+    SVLocus locus3;
+    locusAddPair(locus3,1,10,60,3,20,30);
+
+    SVLocus locus4;
+    locusAddPair(locus4,1,10,60,4,20,30);
+
+    SVLocus locus5;
+    locusAddPair(locus5,1,10,60,4,20,30);
+
+    SVLocus locus6;
+    locusAddPair(locus6,1,10,60,5,20,30);
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus2);
+        set1.merge(locus3);
+        set1.merge(locus4);
+        set1.merge(locus5);
+        set1.merge(locus6);
+        const SVLocusSet& cset1(set1);
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),3u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(1).size(),2u);
+
+        set1.cleanRegion(GenomeInterval(1,0,70));
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),3u);
+    }
+
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusNoiseCleanRemote )
+{
+    SVLocus locus1;
+    locusAddPair(locus1,1,100,110,1,10,20);
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        const SVLocusSet& cset1(set1);
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),2u);
+
+        set1.cleanRegion(GenomeInterval(1,0,120));
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),0u);
+    }
+
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusEvidenceRange )
+{
+    SVLocus locus1;
+    {
+        NodeIndexType node1 = locus1.addNode(GenomeInterval(1,100,110));
+        NodeIndexType node2 = locus1.addNode(GenomeInterval(2,100,110));
+        locus1.linkNodes(node1,node2);
+        locus1.setNodeEvidence(node1,known_pos_range2(50,60));
+    }
+
+    SVLocus locus2;
+    {
+        NodeIndexType node1 = locus2.addNode(GenomeInterval(1,100,110));
+        NodeIndexType node2 = locus2.addNode(GenomeInterval(2,100,110));
+        locus2.linkNodes(node1,node2);
+        locus2.setNodeEvidence(node1,known_pos_range2(30,40));
+    }
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus2);
+        const SVLocusSet& cset1(set1);
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).getNode(0).getEvidenceRange(),known_pos_range2(30,60));
+    }
+
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusNoiseOverlap )
+{
+    // adapted from a production failure case:
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,60,2,20,30);
+    SVLocus locus2;
+    locusAddPair(locus2,1,10,60,2,20,30);
+    SVLocus locus3;
+    locusAddPair(locus3,1,59,70,3,20,30);
+    SVLocus locus4;
+    locusAddPair(locus4,1,65,70,3,20,30);
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus2);
+        set1.merge(locus3);
+        set1.merge(locus4);
+        const SVLocusSet& cset1(set1);
+
+        set1.finalize();
+        cset1.checkState(true,true);
+    }
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusSingleSelfEdge )
+{
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,60,1,20,70);
+    locus1.mergeSelfOverlap();
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 1;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        const SVLocusSet& cset1(set1);
+
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),1u);
+
+        set1.clean();
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),1u);
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusDoubleSelfEdge )
+{
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,60,1,20,70);
+    SVLocus locus2;
+    locusAddPair(locus2,1,20,70,1,10,60, true);
+
+    locus1.mergeSelfOverlap();
+    locus2.mergeSelfOverlap();
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 1;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus2);
+        const SVLocusSet& cset1(set1);
+
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).getNode(0).size(),1u);
+
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).getNode(0).outCount(),2u);
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusDoubleSelfEdge2 )
+{
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,60,2,20,70);
+    SVLocus locus2;
+    locusAddPair(locus2,1,10,60,1,10,60, true);
+
+    locus1.mergeSelfOverlap();
+    locus2.mergeSelfOverlap();
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 1;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus2);
+        const SVLocusSet& cset1(set1);
+
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).getNode(0).size(),2u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).getNode(0).outCount(),2u);
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusNodeOverlapEdge )
+{
+    // test merge of two edges: one edge node encompasses both nodes of the second edge:
+    //
+
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,60,2,20,70);
+    SVLocus locus2;
+    locusAddPair(locus2,1,10,20,1,30,40);
+
+    locus1.mergeSelfOverlap();
+    locus2.mergeSelfOverlap();
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus2);
+        const SVLocusSet& cset1(set1);
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),2u);
+
+        set1.finalize();
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),0u);
+    }
+
+    {
+        // reverse the order of locus addition to be sure:
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus2);
+        set1.merge(locus1);
+        const SVLocusSet& cset1(set1);
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),2u);
+
+        set1.finalize();
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),0u);
+    }
+
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusNodeOverlapSelfEdge )
+{
+    // test merge of two edges: one edge node overlaps a self-edge
+    //
+
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,60,2,20,70);
+    SVLocus locus2;
+    locusAddPair(locus2,1,10,20,1,10,20,true);
+
+    locus1.mergeSelfOverlap();
+    locus2.mergeSelfOverlap();
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus2);
+        const SVLocusSet& cset1(set1);
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),2u);
+
+        set1.finalize();
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),0u);
+    }
+
+    {
+        // reverse the order of locus addition to be sure:
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus2);
+        set1.merge(locus1);
+        const SVLocusSet& cset1(set1);
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),2u);
+
+        set1.finalize();
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),0u);
+    }
+
+    SVLocus locus3;
+    locusAddPair(locus3,1,10,20,1,10,20,true);
+
+    locus3.mergeSelfOverlap();
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus2);
+        set1.merge(locus3);
+        const SVLocusSet& cset1(set1);
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),2u);
+
+        set1.finalize();
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusMergeToSelfEdge )
+{
+    // test merge or edges which are not self-edges themselves, but merge to
+    // a state where they should be a self edge.
+    //
+
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,1,25,40);
+    SVLocus locus2;
+    locusAddPair(locus2,1,15,30,1,35,40);
+
+    locus1.mergeSelfOverlap();
+    locus2.mergeSelfOverlap();
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus2);
+        const SVLocusSet& cset1(set1);
+
+        set1.finalize();
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusMergeToSelfEdge2 )
+{
+    // Situation: Large signal region spans connected region pair.
+    //
+    // Criteria: Large region gains self-edge only if connected regions are signal
+
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,1,30,40);
+    SVLocus locus2;
+    locusAddPair(locus2,1,10,20,1,30,40);
+    SVLocus locus3;
+    locusAddPair(locus3,1,10,40,2,10,20);
+    SVLocus locus4;
+    locusAddPair(locus4,1,10,40,2,10,20);
+
+    {
+        // test non-signal spanned pair
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus3);
+        set1.merge(locus4);
+        const SVLocusSet& cset1(set1);
+
+        set1.finalize();
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(1).size(),2u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(1).getNode(0).size(),1u);
+    }
+
+    {
+        // test signal spanned pair
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus2);
+        set1.merge(locus3);
+        set1.merge(locus4);
+        const SVLocusSet& cset1(set1);
+
+        set1.finalize();
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),2u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).getNode(0).size(),2u);
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusMergeToSelfEdge3 )
+{
+    // Situation: Large signal region with self edge spans connected region.
+    //
+    // Criteria: self edge count increases by one
+
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,40,1,10,40,true);
+    SVLocus locus2;
+    locusAddPair(locus2,1,10,40,1,10,40,true);
+    SVLocus locus3;
+    locusAddPair(locus3,1,10,20,1,30,40);
+
+    locus1.mergeSelfOverlap();
+    locus2.mergeSelfOverlap();
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus2);
+        set1.merge(locus3);
+        const SVLocusSet& cset1(set1);
+
+        set1.finalize();
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).getNode(0).size(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).getNode(0).outCount(),3u);
+    }
+
+    {
+        // run again with locus3 first
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus3);
+        set1.merge(locus1);
+        set1.merge(locus2);
+        const SVLocusSet& cset1(set1);
+
+        set1.finalize();
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).getNode(0).size(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).getNode(0).outCount(),3u);
+    }
+
+}
+
+#if 0
+BOOST_AUTO_TEST_CASE( test_SVLocusMergeToSelfEdge4 )
+{
+    // Situation: Large signal region spans 2 connected region pairs.
+    //
+    // Criteria: Large region gains self-edge only if the connected region pairs total to a self-edge signal
+
+
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,1,30,40);
+    SVLocus locus2;
+    locusAddPair(locus2,1,10,20,1,30,40);
+    SVLocus locus3;
+    locusAddPair(locus3,1,10,40,2,10,20);
+    SVLocus locus4;
+    locusAddPair(locus4,1,10,40,2,10,20);
+
+    {
+        // test non-signal spanned pair
+        SVLocusSet set1(2);
+        set1.merge(locus1);
+        set1.merge(locus3);
+        set1.merge(locus4);
+        const SVLocusSet& cset1(set1);
+
+        set1.finalize();
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(1).size(),2u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(1).getNode(0).size(),1u);
+    }
+
+    {
+        // test signal spanned pair
+        SVLocusSet set1(2);
+        set1.merge(locus1);
+        set1.merge(locus2);
+        set1.merge(locus3);
+        set1.merge(locus4);
+        const SVLocusSet& cset1(set1);
+
+        set1.finalize();
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).size(),2u);
+        BOOST_REQUIRE_EQUAL(cset1.getLocus(0).getNode(0).size(),2u);
+    }
+}
+#endif
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusSmallRegionClean )
+{
+    //
+    // challenge the region cleaner with regions which (1) span both nodes of a pair (2) span the first node (3) span the second node
+    //
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,1,30,40);
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        const SVLocusSet& cset1(set1);
+
+        set1.cleanRegion(GenomeInterval(1,0,70));
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),0u);
+    }
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        const SVLocusSet& cset1(set1);
+
+        set1.cleanRegion(GenomeInterval(1,25,70));
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+    }
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        const SVLocusSet& cset1(set1);
+
+        set1.cleanRegion(GenomeInterval(1,5,25));
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),0u);
+    }
+
+}
+
+BOOST_AUTO_TEST_CASE( test_SVLocusSmallDelRegionClean )
+{
+    // regions picked up from deletions have counts on both sides
+    //
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,1,30,40,true);
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        const SVLocusSet& cset1(set1);
+
+        set1.cleanRegion(GenomeInterval(1,0,70));
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),0u);
+    }
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        const SVLocusSet& cset1(set1);
+
+        set1.clean();
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),0u);
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusCleanSelfEdge )
+{
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,1,10,20,true,0);
+
+    locus1.mergeSelfOverlap();
+
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 3;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        const SVLocusSet& cset1(set1);
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+
+        set1.finalize();
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),0u);
+    }
+}
+
+#if 0
+BOOST_AUTO_TEST_CASE( test_SVLocusTransitiveOverlap )
+{
+    // abstracted from real-data error case on MANTA-28
+    //
+    // what happens when there's a complex transitive overlap chain?
+
+    SVLocus locus1;
+    locusAddPair(locus1,1,25,32,1,25,32,true,3);
+    SVLocus locus2a;
+    locusAddPair(locus2a,1,8,12,1,14,27,true,1);
+    SVLocus locus2;
+    locusAddPair(locus2,1,11,14,1,18,22,true,1);
+    SVLocus locus3;
+    locusAddPair(locus3,1,11,16,1,18,20,true,2);
+
+    locus1.mergeSelfOverlap();
+    locus2a.mergeSelfOverlap();
+    locus2.mergeSelfOverlap();
+    locus3.mergeSelfOverlap();
+    {
+        SVLocusSet set1(3);
+        set1.merge(locus1);
+        set1.merge(locus2a);
+        set1.merge(locus2);
+        set1.merge(locus3);
+        const SVLocusSet& cset1(set1);
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        set1.finalize();
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+    }
+}
+#endif
+
+BOOST_AUTO_TEST_CASE( test_SVLocusTransitiveOverlap2 )
+{
+    // abstracted from real-data error case on MANTA-28
+    //
+    // what happens when there's a complex transitive overlap chain?
+#if 0
+    // original coordinates extracted from actual failure case:
+    SVLocus locus1;
+    locusAddPair(locus1,1,615,837,1,853,900,true,6);
+    SVLocus locus2a;
+    locusAddPair(locus2a,1,464,614,1,712,862,true,1);
+    SVLocus locus2b;
+    locusAddPair(locus2b,1,645,798,1,421,574,false,1);
+    SVLocus locus2c;
+    locusAddPair(locus2c,1,370,851,1,370,851,true,3);
+    SVLocus locus3;
+    locusAddPair(locus3,1,693,843,1,538,688,false,1);
+#endif
+
+    SVLocus locus1;
+    locusAddPair(locus1,1,30,40,1,50,60,true,6);
+    SVLocus locus2a;
+    locusAddPair(locus2a,1,10,20,1,30,60,true,1);
+    SVLocus locus2b;
+    locusAddPair(locus2b,1,30,40,1,10,20,false,1);
+    SVLocus locus2c;
+    locusAddPair(locus2c,1,10,40,1,10,40,true,3);
+    SVLocus locus3;
+    locusAddPair(locus3,1,30,40,1,10,20,false,1);
+
+    locus1.mergeSelfOverlap();
+    locus2a.mergeSelfOverlap();
+    locus2b.mergeSelfOverlap();
+    locus2c.mergeSelfOverlap();
+    locus3.mergeSelfOverlap();
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 6;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus2c);
+        set1.merge(locus2a);
+        set1.merge(locus2b);
+        set1.merge(locus3);
+        const SVLocusSet& cset1(set1);
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        set1.finalize();
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+    }
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusTransitiveOverlap3 )
+{
+    // abstracted from real-data error case on MANTA-28
+    //
+    // what happens when there's a complex transitive overlap chain?
+
+    SVLocus locus1;
+    locusAddPair(locus1,1,40,60,1,70,80,true,2);
+    SVLocus locus2a;
+    locusAddPair(locus2a,1,10,40,1,50,60,true,1);
+    SVLocus locus3;
+    locusAddPair(locus3,1,10,20,1,30,60,false,1);
+
+    locus1.mergeSelfOverlap();
+    locus2a.mergeSelfOverlap();
+    locus3.mergeSelfOverlap();
+    {
+        SVLocusSetOptions sopt;
+        sopt.minMergeEdgeObservations = 2;
+        SVLocusSet set1(sopt);
+        set1.merge(locus1);
+        set1.merge(locus2a);
+        set1.merge(locus3);
+        const SVLocusSet& cset1(set1);
+
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+        set1.finalize();
+        cset1.checkState(true,true);
+        BOOST_REQUIRE_EQUAL(cset1.nonEmptySize(),1u);
+    }
+}
+
+
+// replicate at least one part of MANTA-257 in minimal form:
+BOOST_AUTO_TEST_CASE( test_SVLocusSet_MANTA257_min1 )
+{
+    SVLocus locus1;
+    {
+        const NodeIndexType nodePtr1 = locus1.addNode(GenomeInterval(0,10,20));
+        const NodeIndexType nodePtr2 = locus1.addNode(GenomeInterval(1,60,80));
+        const NodeIndexType nodePtr3 = locus1.addNode(GenomeInterval(1,20,50));
+
+        locus1.linkNodes(nodePtr1,nodePtr2);
+        locus1.linkNodes(nodePtr1,nodePtr3);
+    }
+
+    SVLocus locus2;
+    {
+        const NodeIndexType nodePtr1 = locus2.addNode(GenomeInterval(1,10,30));
+        const NodeIndexType nodePtr2 = locus2.addNode(GenomeInterval(0,10,20));
+        const NodeIndexType nodePtr3 = locus2.addNode(GenomeInterval(1,40,70));
+
+        locus2.linkNodes(nodePtr1,nodePtr2);
+        locus2.linkNodes(nodePtr3,nodePtr1);
+    }
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 1;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus2);
+    const SVLocusSet& cset1(set1);
+
+    set1.finalize();
+    cset1.checkState(true,true);
+}
+
+
+// replicate the MANTA-257 bug in slightly reduced form:
+BOOST_AUTO_TEST_CASE( test_SVLocusSet_MANTA257_simplified )
+{
+
+    SVLocus locus1;
+    {
+        const NodeIndexType nodePtr1 = locus1.addNode(GenomeInterval(0,10,40));
+        const NodeIndexType nodePtr2 = locus1.addNode(GenomeInterval(1,60,100));
+        const NodeIndexType nodePtr3 = locus1.addNode(GenomeInterval(1,20,50));
+
+        locus1.linkNodes(nodePtr1,nodePtr2,1,0);
+        locus1.linkNodes(nodePtr1,nodePtr3,1,0);
+    }
+
+    SVLocus locus2;
+    {
+        const NodeIndexType nodePtr1 = locus2.addNode(GenomeInterval(1,10,30));
+        const NodeIndexType nodePtr2 = locus2.addNode(GenomeInterval(0,20,30));
+        const NodeIndexType nodePtr3 = locus2.addNode(GenomeInterval(1,80,90));
+        const NodeIndexType nodePtr4 = locus2.addNode(GenomeInterval(1,40,70));
+
+        locus2.linkNodes(nodePtr1,nodePtr2,1,0);
+        locus2.linkNodes(nodePtr1,nodePtr3,1,0);
+        locus2.linkNodes(nodePtr4,nodePtr1,1,0);
+    }
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 1;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus2);
+    const SVLocusSet& cset1(set1);
+
+    set1.finalize();
+    cset1.checkState(true,true);
+}
+
+
+// replicate the MANTA-257 bug in minimally reduced form:
+BOOST_AUTO_TEST_CASE( test_SVLocusSet_MANTA257 )
+{
+
+    SVLocus locus1;
+    {
+        const NodeIndexType nodePtr1 = locus1.addNode(GenomeInterval(0,2255650,2256356));
+        const NodeIndexType nodePtr2 = locus1.addNode(GenomeInterval(1,776,1618));
+        const NodeIndexType nodePtr3 = locus1.addNode(GenomeInterval(1,-298,488));
+
+        locus1.linkNodes(nodePtr1,nodePtr2,51,0);
+        locus1.linkNodes(nodePtr1,nodePtr3,78,0);
+    }
+
+    SVLocus locus2;
+    {
+        const NodeIndexType nodePtr1 = locus2.addNode(GenomeInterval(1,-309,265));
+        const NodeIndexType nodePtr2 = locus2.addNode(GenomeInterval(0,2255700,2256245));
+        const NodeIndexType nodePtr3 = locus2.addNode(GenomeInterval(1,1018,1595));
+        const NodeIndexType nodePtr4 = locus2.addNode(GenomeInterval(1,412,904));
+
+        locus2.linkNodes(nodePtr1,nodePtr2,21,0);
+        locus2.linkNodes(nodePtr1,nodePtr3,9,3);
+        locus2.linkNodes(nodePtr4,nodePtr1,12,0);
+    }
+
+    SVLocusSetOptions sopt;
+    sopt.minMergeEdgeObservations = 9;
+    SVLocusSet set1(sopt);
+    set1.merge(locus1);
+    set1.merge(locus2);
+    const SVLocusSet& cset1(set1);
+
+    set1.finalize();
+    cset1.checkState(true,true);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/svgraph/test/SVLocusTest.cpp b/src/c++/lib/svgraph/test/SVLocusTest.cpp
new file mode 100644
index 0000000..8bc479f
--- /dev/null
+++ b/src/c++/lib/svgraph/test/SVLocusTest.cpp
@@ -0,0 +1,169 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#include "boost/test/unit_test.hpp"
+
+#include "svgraph/SVLocus.hh"
+
+#include "SVLocusTestUtil.hh"
+
+
+BOOST_AUTO_TEST_SUITE( test_SVLocus )
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocus1 )
+{
+
+    // construct a simple two-node locus
+    SVLocus locus1;
+    locusAddPair(locus1,1,10,20,1,30,40);
+
+    BOOST_REQUIRE_EQUAL(locus1.size(),2u);
+
+    for (const SVLocusNode& node : static_cast<const SVLocus&>(locus1))
+    {
+        BOOST_REQUIRE_EQUAL(node.size(),1u);
+    }
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusNodeMerge2)
+{
+    SVLocus locus1;
+    NodeIndexType nodePtr1 = locus1.addNode(GenomeInterval(1,10,20));
+    NodeIndexType nodePtr2 = locus1.addNode(GenomeInterval(1,15,25));
+    NodeIndexType nodePtr3 = locus1.addNode(GenomeInterval(3,10,20));
+    NodeIndexType nodePtr4 = locus1.addNode(GenomeInterval(4,10,20));
+    locus1.linkNodes(nodePtr1,nodePtr3);
+    locus1.linkNodes(nodePtr2,nodePtr4);
+
+    //locus1.mergeNode(nodePtr2, nodePtr1, NULL);
+    locus1.mergeSelfOverlap();
+
+    const SVLocusNode& node1(static_cast<const SVLocus&>(locus1).getNode(nodePtr1));
+
+    BOOST_REQUIRE_EQUAL(node1.outCount(),2u);
+    BOOST_REQUIRE_EQUAL(node1.getInterval().range.begin_pos(),10);
+    BOOST_REQUIRE_EQUAL(node1.getInterval().range.end_pos(),25);
+    BOOST_REQUIRE_EQUAL(node1.size(),2u);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusNodeMergeSelfEdge)
+{
+    SVLocus locus1;
+    NodeIndexType nodePtr1 = locus1.addNode(GenomeInterval(1,10,20));
+    NodeIndexType nodePtr2 = locus1.addNode(GenomeInterval(1,15,25));
+    locus1.linkNodes(nodePtr1,nodePtr2);
+    locus1.mergeSelfOverlap();
+
+    const SVLocusNode& node1(static_cast<const SVLocus&>(locus1).getNode(0));
+
+    BOOST_REQUIRE_EQUAL(node1.outCount(),1u);
+    BOOST_REQUIRE_EQUAL(node1.getInterval().range.begin_pos(),10);
+    BOOST_REQUIRE_EQUAL(node1.getInterval().range.end_pos(),25);
+    BOOST_REQUIRE_EQUAL(node1.size(),1u);
+
+    // test that the single edge of the merged node is to self:
+    BOOST_REQUIRE_EQUAL(node1.getEdgeManager().getMap().begin()->first,0u);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusNodeMergeSelfEdgeReverse)
+{
+    SVLocus locus1;
+    NodeIndexType nodePtr1 = locus1.addNode(GenomeInterval(1,10,20));
+    NodeIndexType nodePtr2 = locus1.addNode(GenomeInterval(1,15,25));
+    locus1.linkNodes(nodePtr2,nodePtr1);
+    locus1.mergeSelfOverlap();
+
+    const SVLocusNode& node1(static_cast<const SVLocus&>(locus1).getNode(0));
+
+    BOOST_REQUIRE_EQUAL(node1.outCount(),1u);
+    BOOST_REQUIRE_EQUAL(node1.getInterval().range.begin_pos(),10);
+    BOOST_REQUIRE_EQUAL(node1.getInterval().range.end_pos(),25);
+    BOOST_REQUIRE_EQUAL(node1.size(),1u);
+
+    // test that the single edge of the merged node is to self:
+    BOOST_REQUIRE_EQUAL(node1.getEdgeManager().getMap().begin()->first,0u);
+}
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusNodeMergeMultiSelfEdge )
+{
+    SVLocus locus1;
+    NodeIndexType nodePtr1 = locus1.addNode(GenomeInterval(1,10,20));
+    NodeIndexType nodePtr1copy = locus1.addNode(GenomeInterval(1,10,20));
+    NodeIndexType nodePtr2 = locus1.addNode(GenomeInterval(1,15,25));
+    locus1.linkNodes(nodePtr1,nodePtr1copy,1,1);
+    locus1.linkNodes(nodePtr1,nodePtr2);
+
+    locus1.mergeSelfOverlap();
+
+    const SVLocusNode& node1(static_cast<const SVLocus&>(locus1).getNode(0));
+
+    BOOST_REQUIRE_EQUAL(node1.outCount(),2u);
+    BOOST_REQUIRE_EQUAL(node1.getInterval().range.begin_pos(),10);
+    BOOST_REQUIRE_EQUAL(node1.getInterval().range.end_pos(),25);
+    BOOST_REQUIRE_EQUAL(node1.size(),1u);
+
+    // test that the single edge of the merged node is to self:
+    BOOST_REQUIRE_EQUAL(node1.getEdgeManager().getMap().begin()->first,0u);
+}
+
+
+
+BOOST_AUTO_TEST_CASE( test_SVLocusClearEdges )
+{
+    // construct a diamond four-node locus
+    //
+    //  1
+    // 2 3
+    //  4
+    //
+    SVLocus locus1;
+    NodeIndexType nodePtr1 = locus1.addNode(GenomeInterval(1,10,20));
+    NodeIndexType nodePtr2 = locus1.addNode(GenomeInterval(1,30,40));
+    NodeIndexType nodePtr3 = locus1.addNode(GenomeInterval(1,50,60));
+    NodeIndexType nodePtr4 = locus1.addNode(GenomeInterval(1,70,80));
+    locus1.linkNodes(nodePtr1,nodePtr2);
+    locus1.linkNodes(nodePtr1,nodePtr3);
+    locus1.linkNodes(nodePtr2,nodePtr4);
+    locus1.linkNodes(nodePtr3,nodePtr4);
+
+    // now disconnect 1 from 2,3:
+    locus1.clearNodeEdges(nodePtr1);
+
+    const SVLocus& clocus1(locus1);
+    BOOST_REQUIRE_EQUAL(clocus1.size(),4u);
+
+    BOOST_REQUIRE_EQUAL(clocus1.getNode(nodePtr1).size(),0u);
+    BOOST_REQUIRE_EQUAL(clocus1.getNode(nodePtr2).size(),1u);
+    BOOST_REQUIRE_EQUAL(clocus1.getNode(nodePtr3).size(),1u);
+    BOOST_REQUIRE_EQUAL(clocus1.getNode(nodePtr4).size(),2u);
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/c++/lib/svgraph/test/SVLocusTestUtil.hh b/src/c++/lib/svgraph/test/SVLocusTestUtil.hh
new file mode 100644
index 0000000..c19d20c
--- /dev/null
+++ b/src/c++/lib/svgraph/test/SVLocusTestUtil.hh
@@ -0,0 +1,48 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+///
+/// \author Chris Saunders
+///
+
+#pragma once
+
+#include "svgraph/SVLocus.hh"
+
+
+inline
+void
+locusAddPair(
+    SVLocus& locus,
+    const int32_t tid1,
+    const int32_t beginPos1,
+    const int32_t endPos1,
+    const int32_t tid2,
+    const int32_t beginPos2,
+    const int32_t endPos2,
+    const bool bothLocal = false,
+    const int count = 1)
+{
+    const NodeIndexType nodePtr1 = locus.addNode(GenomeInterval(tid1,beginPos1,endPos1));
+    const NodeIndexType nodePtr2 = locus.addNode(GenomeInterval(tid2,beginPos2,endPos2));
+    const int remoteCount(bothLocal ? count : 0);
+    locus.linkNodes(nodePtr1,nodePtr2,count,remoteCount);
+}
+
diff --git a/src/c++/lib/svgraph/test/test_main.cpp b/src/c++/lib/svgraph/test/test_main.cpp
new file mode 100644
index 0000000..1f05f22
--- /dev/null
+++ b/src/c++/lib/svgraph/test/test_main.cpp
@@ -0,0 +1,23 @@
+// -*- mode: c++; indent-tabs-mode: nil; -*-
+//
+// Manta - Structural Variant and Indel Caller
+// Copyright (c) 2013-2016 Illumina, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//
+
+#define BOOST_TEST_MODULE libsvgraph
+#include "boost/test/unit_test.hpp"
+
diff --git a/src/cmake/boost.cmake b/src/cmake/boost.cmake
new file mode 100644
index 0000000..1b11200
--- /dev/null
+++ b/src/cmake/boost.cmake
@@ -0,0 +1,256 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## Configuration file for boost installation
+##
+## author Come Raczy
+##
+################################################################################
+
+# set to TRUE to see more detailed information about the boost find/build procedure:
+set (DEBUG_FINDBOOST FALSE)
+if (${DEBUG_FINDBOOST})
+    set (Boost_DEBUG "ON")
+    set (Boost_DETAILED_FAILURE_MSG "ON")
+endif ()
+
+macro (initBoostParams)
+    # required boost libraries
+    set (THIS_BOOST_VERSION 1.56.0)
+    # note we default to alphabetical order here, except where boost libraries depend on
+    # each other (timer->chrono)
+    set (THIS_BOOST_COMPONENTS date_time filesystem program_options
+                                regex serialization system timer chrono unit_test_framework)
+
+    # the name given to boost.build and the library name are the same for all libraries, except
+    # for test, so we need two lists now:
+    set (THIS_BOOST_BUILD_COMPONENTS date_time filesystem program_options
+                                     regex serialization system timer chrono test)
+    set (Boost_USE_STATIC_LIBS ON)
+    if (NOT WIN32)
+        # bjam on windows ignores this setting so skip for win32:
+        set (Boost_USE_MULTITHREADED OFF)
+    endif ()
+endmacro()
+
+# simple helper for resetFindBoost
+macro(unsetall name)
+    unset (${name} CACHE)
+    unset (${name})
+endmacro()
+
+
+function(makedir path)
+    if(NOT EXISTS "${path}")
+        file(MAKE_DIRECTORY "${path}")
+    endif()
+endfunction()
+
+
+macro (resetFindBoost)
+
+    set(BOOST_RESET_SYMBOLS FOUND INCLUDE_DIRS INCLUDE_DIR LIBRARIES LIBRARY_DIRS VERSION LIB_VERSION MAJOR_VERSION MINOR_VERSION SUBMINOR_VERSION USE_STATIC_LIBS USE_MULTITHREADED)
+
+    foreach (TAG ${BOOST_RESET_SYMBOLS})
+        unsetall (Boost_${TAG})
+    endforeach()
+
+    unset (ENV{BOOST_LIBRARYDIR})
+
+    foreach (COMPONENT ${THIS_BOOST_COMPONENTS})
+        STRING(TOUPPER ${COMPONENT} UPPERCOMPONENT)
+        unsetall (Boost_${UPPERCOMPONENT}_FOUND)
+        unsetall (Boost_${UPPERCOMPONENT}_LIBRARY)
+        unsetall (Boost_${UPPERCOMPONENT}_LIBRARY_RELEASE)
+        unsetall (Boost_${UPPERCOMPONENT}_LIBRARY_DEBUG)
+    endforeach ()
+
+    initBoostParams()
+endmacro ()
+
+
+initBoostParams()
+
+if (THIS_FORCE_STATIC_LINK)
+    set(Boost_USE_STATIC_LIBS ON)
+endif ()
+
+set(BOOST_BOOTSTRAP_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/bootstrap/boost)
+if (EXISTS "${BOOST_BOOTSTRAP_INSTALL_DIR}/boost_install_complete")
+    set (BOOST_ROOT "${BOOST_BOOTSTRAP_INSTALL_DIR}")
+endif ()
+
+# newer cmake versions (at least cmake 3.5) will issue tons of warning text for
+# each component query if no boost version is found at all, for this reason we
+# break up the boost version search into two steps:
+#
+# (1) can you find boost >= min_version at all?
+# (2) if so, does that boost version include all of our required components?
+#
+find_package(Boost ${THIS_BOOST_VERSION})
+if (Boost_FOUND)
+    find_package(Boost ${THIS_BOOST_VERSION} COMPONENTS ${THIS_BOOST_COMPONENTS} QUIET)
+endif ()
+
+# CMAKE_PARALLEL is only used if boost is found, but moving the setting here (outside of the if below) supresses a cmake warning:
+if (NOT CMAKE_PARALLEL)
+    set (CMAKE_PARALLEL "1")
+endif ()
+
+#
+# If the right version of boost is not found, it will be built from the distribution
+#
+if (NOT Boost_FOUND)
+    foreach(COMPONENT ${THIS_BOOST_COMPONENTS})
+        STRING(TOUPPER ${COMPONENT} UPPERCOMPONENT)
+        if (${Boost_${UPPERCOMPONENT}_FOUND})
+            set(FOUND_STATUS "found")
+        else()
+            set(FOUND_STATUS "NOT FOUND")
+        endif()
+        message(STATUS "Boost component: ${COMPONENT}\tstatus: ${FOUND_STATUS}")
+    endforeach()
+
+    if (BOOST_ROOT)
+        message (STATUS "BOOST_ROOT is set to ${BOOST_ROOT} but boost ${THIS_BOOST_VERSION} or one of its components was not found.")
+        message (FATAL_ERROR "Unset BOOST_ROOT or set it to the root location of boost ${THIS_BOOST_VERSION}.")
+    endif()
+
+    # Try to find it in target installation location
+    resetFindBoost()
+    message(STATUS "Boost version ${THIS_BOOST_VERSION} or higher not found. Boost will be built from the distribution...")
+
+    set(ENV{THIS_BOOST_BUILD_COMPONENTS} "${THIS_BOOST_BUILD_COMPONENTS}")
+    set(ENV{THIS_BOOST_VERSION} "${THIS_BOOST_VERSION}")
+
+    set(THIS_BOOTSTRAP_DIR "${THIS_MODULE_DIR}/bootstrap")
+
+    string (REPLACE "." "_" BOOST_FILENAME_VERSION "${THIS_BOOST_VERSION}")
+    set (BOOST_SOURCE_PREFIX "boost_${BOOST_FILENAME_VERSION}")
+    set (BOOST_INSTALL_DIR "${BOOST_BOOTSTRAP_INSTALL_DIR}")
+    set (BOOST_BUILD_DIR "${BOOST_INSTALL_DIR}/build")
+    set (BOOST_SRC_DIR "${BOOST_BUILD_DIR}/${BOOST_SOURCE_PREFIX}")
+    makedir("${BOOST_BUILD_DIR}")
+
+    if (NOT EXISTS "${BOOST_BUILD_DIR}/boost_unpack_complete")
+        message(STATUS "Unpacking boost library source")
+        execute_process(
+            COMMAND ${CMAKE_COMMAND} -E tar xjf "${THIS_REDIST_DIR}/${BOOST_SOURCE_PREFIX}.tar.bz2"
+            WORKING_DIRECTORY "${BOOST_BUILD_DIR}"
+            RESULT_VARIABLE TMP_RESULT)
+
+        if (TMP_RESULT)
+            message (FATAL_ERROR "Failed to unpack boost library ${THIS_BOOST_VERSION}")
+        endif ()
+        execute_process(
+            COMMAND ${CMAKE_COMMAND} -E touch "${BOOST_BUILD_DIR}/boost_unpack_complete")
+    endif ()
+
+    set (BOOST_BOOTSTRAP sh "bootstrap.sh")
+    if (WIN32)
+        set (BOOST_BOOTSTRAP "bootstrap.bat")
+    endif ()
+
+    # boost compile works in windows, but we aren't going to link anyway so we're
+    # skipping to save time:
+    #
+    if (NOT WIN32)
+
+        set (BJAM_OPTIONS "")
+
+        set (UCONFIG "${BOOST_SRC_DIR}/tools/build/src/user-config.jam")
+        if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+            file (WRITE "${UCONFIG}" "using gcc : : \"${CMAKE_CXX_COMPILER}\" ;\n")
+        elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+            file (WRITE "${UCONFIG}" "using clang : : \"${CMAKE_CXX_COMPILER}\" ;\n")
+            set (BJAM_OPTIONS ${BJAM_OPTIONS} "define=_GLIBCXX_USE_CXX11_ABI=0")
+            set (BJAM_OPTIONS ${BJAM_OPTIONS} "toolset=clang")
+        endif ()
+
+        message(STATUS "Configuring boost library")
+        execute_process(
+            COMMAND ${BOOST_BOOTSTRAP}
+            WORKING_DIRECTORY "${BOOST_SRC_DIR}"
+            RESULT_VARIABLE TMP_RESULT
+            OUTPUT_QUIET)
+
+        if (TMP_RESULT)
+            message (FATAL_ERROR "Failed to configure boost library ${THIS_BOOST_VERSION}")
+        endif ()
+
+        foreach (BOOST_LIBRARY ${THIS_BOOST_BUILD_COMPONENTS})
+            list (APPEND BOOST_BJAM_LIBRARY_SELECTION "--with-${BOOST_LIBRARY}")
+        endforeach ()
+
+        # Include full path for bjam so that we don't depend on cwd in build user's PATH
+        set (BOOST_BJAM "${BOOST_SRC_DIR}/bjam")
+
+        set (BJAM_OPTIONS ${BJAM_OPTIONS} "link=static")
+        if (WIN32)
+            if (MSVC)
+                math (EXPR VS_VERSION "(${MSVC_VERSION}/100) - 6")
+                set(TOOLSET "toolset=msvc-${VS_VERSION}.0")
+            endif ()
+
+            # windows bjam ignores single/static so don't even try:
+            set (BJAM_OPTIONS ${BJAM_OPTIONS} "${TOOLSET}")
+        else ()
+            set (BJAM_OPTIONS ${BJAM_OPTIONS} "threading=single")
+        endif ()
+
+        set (BOOST_BUILD_CMD ${BOOST_BJAM} --prefix=${BOOST_INSTALL_DIR} ${BOOST_BJAM_LIBRARY_SELECTION} -j${CMAKE_PARALLEL} --libdir=${BOOST_INSTALL_DIR}/lib ${BJAM_OPTIONS} install)
+        set (BOOST_BUILD_ERROR_LOG "${BOOST_INSTALL_DIR}/boost.build.error.txt")
+        message(STATUS "Building boost library")
+        execute_process(
+            COMMAND ${BOOST_BUILD_CMD}
+            WORKING_DIRECTORY "${BOOST_SRC_DIR}"
+            RESULT_VARIABLE TMP_RESULT
+            OUTPUT_FILE ${BOOST_BUILD_ERROR_LOG}
+            ERROR_FILE ${BOOST_BUILD_ERROR_LOG})
+
+        if (TMP_RESULT)
+            string (REPLACE ";" " " BOOST_PRETTY_BUILD_CMD "${BOOST_BUILD_CMD}")
+            message (STATUS "Boost build command: 'cd ${BOOST_SRC_DIR} && ${BOOST_PRETTY_BUILD_CMD}'")
+            message (FATAL_ERROR "Failed to build boost library ${THIS_BOOST_VERSION}. See build log: ${BOOST_BUILD_ERROR_LOG}")
+        endif ()
+
+        message (STATUS "Successfuly built boost ${THIS_BOOST_VERSION}")
+        execute_process(
+            COMMAND ${CMAKE_COMMAND} -E touch "${BOOST_INSTALL_DIR}/boost_install_complete"
+            WORKING_DIRECTORY "${BOOST_SRC_DIR}")
+    else ()
+        # for the time being on windows the goal is only to enable dev and library level compilation, not linking, so we don't need the libraries
+        # do a quick copy instead to get the headers in place only:
+        message(STATUS "WIN32 - skipping to header only boost installation for non-linked development")
+        makedir("${BOOST_INSTALL_DIR}/include")
+        makedir("${BOOST_INSTALL_DIR}/lib")
+        file(COPY "${BOOST_SRC_DIR}/boost"
+            DESTINATION "${BOOST_INSTALL_DIR}/include")
+    endif ()
+
+    set (BOOST_ROOT "${BOOST_BOOTSTRAP_INSTALL_DIR}")
+    find_package(Boost ${THIS_BOOST_VERSION} COMPONENTS ${THIS_BOOST_COMPONENTS})
+endif ()
+
+foreach(COMPONENT ${THIS_BOOST_COMPONENTS})
+    STRING(TOUPPER ${COMPONENT} UPPERCOMPONENT)
+    set(HAVE_LIBBOOST_${UPPERCOMPONENT} ${Boost_${UPPERCOMPONENT}_FOUND})
+endforeach()
diff --git a/src/cmake/bootstrap/common.bash b/src/cmake/bootstrap/common.bash
new file mode 100644
index 0000000..1241d53
--- /dev/null
+++ b/src/cmake/bootstrap/common.bash
@@ -0,0 +1,47 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## Definition of functions and variables common to all bootstrap scripts.
+##
+## author Come Raczy
+##
+################################################################################
+
+# common log definition for bash installation scripts:
+ilog() {
+	echo -e $@ >&2
+}
+
+
+common_create_source () {
+    if [[ ! -e $SOURCE_TARBALL ]] ; then
+        ilog $SCRIPT: source tarball $SOURCE_TARBALL not found
+        exit 1
+    fi
+    ilog Decompressing $SOURCE_TARBALL
+    mkdir -p ${BUILD_DIR}
+    tar -C ${BUILD_DIR} -${TARBALL_COMPRESSION}xf $SOURCE_TARBALL
+
+    if [[ ! -d $SOURCE_DIR ]] ; then
+        ilog $SOURCE_DIR does not exist
+        exit 1
+    fi
+}
diff --git a/src/cmake/bootstrap/installCmake.bash b/src/cmake/bootstrap/installCmake.bash
new file mode 100644
index 0000000..4bd8199
--- /dev/null
+++ b/src/cmake/bootstrap/installCmake.bash
@@ -0,0 +1,98 @@
+#!/usr/bin/env bash
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## Installation script for cmake
+##
+## author Come Raczy
+##
+################################################################################
+
+set -o nounset
+set -o pipefail
+
+REDIST_DIR=$1
+INSTALL_DIR=$2
+if [[ $# -ge 3 ]] ; then PARALLEL=$3 ; else PARALLEL=1 ; fi
+
+script_dir="$(dirname "$0")"
+source $script_dir/common.bash
+
+BUILD_DIR=${INSTALL_DIR}/build
+BIN_DIR=${INSTALL_DIR}/bin
+INCLUDE_DIR=${INSTALL_DIR}/include
+
+CMAKE_MAJOR=2
+CMAKE_MINOR=8
+CMAKE_PATCH=12
+CMAKE_PATCH_MIN=4
+CMAKE_REQUIRED="$CMAKE_MAJOR.$CMAKE_MINOR.$CMAKE_PATCH_MIN"
+TARBALL_VERSION="$CMAKE_MAJOR.$CMAKE_MINOR.$CMAKE_PATCH"
+SCRIPT=`basename "$0"`
+SOURCE_TARBALL=${REDIST_DIR}/cmake-$TARBALL_VERSION.tar.bz2
+TARBALL_COMPRESSION=j
+SOURCE_DIR=${BUILD_DIR}/cmake-$TARBALL_VERSION
+CMAKE_DIR=cmake-$CMAKE_MAJOR.$CMAKE_MINOR
+
+AVAILABLE_CMAKE_VERSION=`cmake --version 2> /dev/null`
+if [[ "${AVAILABLE_CMAKE_VERSION}" =~ ^cmake\ version\ ([0-9]+)\.([0-9]+)\.([0-9]+) ]] ; then
+    MAJOR=${BASH_REMATCH[1]}
+    MINOR=${BASH_REMATCH[2]}
+    PATCH=${BASH_REMATCH[3]}
+    if [[ "$MAJOR" -gt "$CMAKE_MAJOR" || ( "$MAJOR" -eq "$CMAKE_MAJOR" && ( "$MINOR" -gt "$CMAKE_MINOR" || ( "$MINOR" -eq "$CMAKE_MINOR" && "$PATCH" -ge "$CMAKE_PATCH_MIN"  ) ) ) ]] ; then
+        ilog "${BASH_REMATCH[0]} (>= $CMAKE_REQUIRED) is already installed"
+        echo "cmake"
+        exit 0
+    fi
+fi
+
+OLD_CMAKE_VERSION=`${BIN_DIR}/cmake --version 2> /dev/null`;
+if [[ $OLD_CMAKE_VERSION == "cmake version $TARBALL_VERSION" ]] ; then
+    ilog "cmake version \"$TARBALL_VERSION\" is already installed at ${BIN_DIR}/cmake"
+    echo "${BIN_DIR}/cmake"
+    exit 0
+elif [[ $OLD_CMAKE_VERSION != "" ]] ; then
+    ilog "ERROR: unable to install cmake version \"$TARBALL_VERSION\" in ${BIN_DIR}"
+    ilog "\tcmake version \"$OLD_CMAKE_VERSION\" is in the way."
+    ilog "\tPlease use an empty location to build the product."
+    exit 1
+fi
+
+
+##
+## cleanup all existing source directory before proceeding
+##
+rm -rf $SOURCE_DIR
+
+common_create_source
+
+ilog "Extracted cmake version $TARBALL_VERSION source code into $SOURCE_DIR"
+cmd="cd $SOURCE_DIR && ./bootstrap --prefix=\"${INSTALL_DIR}\" --parallel=$PARALLEL && make -j $PARALLEL && make install"
+ilog "Installing cmake using: '$cmd'"
+eval $cmd 1>&2
+
+if [ $? != 0 ] ; then ilog "ERROR: cmake build failed: Terminating..."; exit 1 ; fi
+
+ilog "Cleaning up ${SOURCE_DIR}"
+rm -rf ${SOURCE_DIR}
+
+ilog "CMake-$TARBALL_VERSION installed successfully"
+echo "${BIN_DIR}/cmake"
diff --git a/src/cmake/buildTimeConfigure.cmake b/src/cmake/buildTimeConfigure.cmake
new file mode 100644
index 0000000..731e205
--- /dev/null
+++ b/src/cmake/buildTimeConfigure.cmake
@@ -0,0 +1,38 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+# \author Chris Saunders
+
+# update file with various build-time properties
+# requires CONFIG_FILE SOURCE_FILE DEST_FILE
+#
+# expected CONFIG_FILE format is:
+# "key1\tvalue1\n"
+# "key2\tvalue2\n" ...
+#
+
+file (READ ${CONFIG_FILE} CONFIG_LINES)
+STRING(REPLACE "\n" ";" CONFIG_LINES "${CONFIG_LINES}")
+foreach (CONFIG_LINE ${CONFIG_LINES})
+    STRING(REPLACE "\t" ";" CONFIG_LIST "${CONFIG_LINE}")
+    list (GET CONFIG_LIST 0 PAIR0)
+    list (GET CONFIG_LIST 1 PAIR1)
+    set(${PAIR0} "${PAIR1}")
+endforeach ()
+configure_file(${SOURCE_FILE} ${DEST_FILE} @ONLY)
diff --git a/src/cmake/cxxCommon.cmake b/src/cmake/cxxCommon.cmake
new file mode 100644
index 0000000..ba7ba37
--- /dev/null
+++ b/src/cmake/cxxCommon.cmake
@@ -0,0 +1,31 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## CMake shared c++ configuration
+##
+################################################################################
+
+include_directories (BEFORE SYSTEM ${THIS_CXX_BEFORE_SYSTEM_INCLUDES})
+include_directories (${THIS_CXX_ALL_INCLUDES})
+include_directories (${CMAKE_CURRENT_BINARY_DIR})
+include_directories (${CMAKE_CURRENT_SOURCE_DIR})
+include_directories (${THIS_CXX_CONFIG_H_DIR})
+
diff --git a/src/cmake/cxxConfigure.cmake b/src/cmake/cxxConfigure.cmake
new file mode 100644
index 0000000..62cf8b5
--- /dev/null
+++ b/src/cmake/cxxConfigure.cmake
@@ -0,0 +1,511 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## CMake configuration file for c++ executables
+##
+## author Come Raczy
+##
+################################################################################
+
+
+INCLUDE(CheckFunctionExists)
+
+find_path(HAVE_INTTYPES_H  inttypes.h)
+find_path(HAVE_MEMORY_H    memory.h)
+find_path(HAVE_STDINT_H    stdint.h)
+find_path(HAVE_STDLIB_H    stdlib.h)
+find_path(HAVE_STRING_H    string.h)
+find_path(HAVE_STRINGS_H   strings.h)
+find_path(HAVE_UNISTD_H    unistd.h)
+
+set (CMAKE_REQUIRED_LIBRARIES m)
+check_function_exists(floorf HAVE_FLOORF)
+check_function_exists(round  HAVE_ROUND)
+check_function_exists(roundf HAVE_ROUNDF)
+check_function_exists(powf HAVE_POWF)
+
+include ("${THIS_MACROS_CMAKE}")
+
+# Support for static linking
+# Note that this implies that all libraries must be found with the
+# exact file name (libXXX.a or libXXX.so)
+#if    (THIS_FORCE_STATIC_LINK)
+#    message(STATUS "All libraries will be statically linked")
+#    set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "-static")
+#    set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "-static")
+    # ensure that even if cmake decides to allow for dynamic libs resolution,
+    # this gets overriden into static...
+#    set(CMAKE_EXE_LINK_DYNAMIC_CXX_FLAGS ${CMAKE_EXE_LINK_STATIC_CXX_FLAGS})
+#    set(THIS_LIBRARY_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX})
+#    set(THIS_LIBRARY_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
+    # set(CMAKE_EXE_LINKER_FLAGS "-static-libgcc -static-libstdc++")
+#else  ()
+#    set(THIS_LIBRARY_PREFIX "")
+#    set(THIS_LIBRARY_SUFFIX "")
+#endif ()
+
+# point to local opt/ copy of zlib:
+include_directories(${ZLIB_DIR})
+set  (THIS_ADDITIONAL_LIB ${THIS_ADDITIONAL_LIB} ${ZLIB_LIBRARY})
+
+# required support for librt to allow boost chrono
+if (UNIX AND NOT APPLE)
+    set  (THIS_ADDITIONAL_LIB ${THIS_ADDITIONAL_LIB} rt)
+endif ()
+
+if (WIN32)
+    set  (THIS_ADDITIONAL_LIB ${THIS_ADDITIONAL_LIB} ws2_32)
+endif ()
+
+
+# htslib 1.x forces pthreads in link:
+find_package( Threads )
+set  (THIS_ADDITIONAL_LIB ${THIS_ADDITIONAL_LIB} ${CMAKE_THREAD_LIBS_INIT})
+
+# setup ccache if found in path
+if (NOT WIN32)
+    find_program(CCACHE_PATH ccache)
+    set (IS_CCACHE TRUE)
+    if (CCACHE_PATH STREQUAL "CCACHE_PATH-NOTFOUND")
+        set (IS_CCACHE FALSE)
+    endif()
+endif ()
+
+set (IS_CLANG ((CMAKE_C_COMPILER_ID STREQUAL "Clang") OR (CMAKE_C_COMPILER_ID STREQUAL "AppleClang")))
+set (IS_CLANGXX ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")))
+
+
+if (${IS_CCACHE})
+    message (STATUS "Found ccache: ${CCACHE_PATH}")
+    SET_PROPERTY(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PATH})
+    SET_PROPERTY(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_PATH})
+
+    # special logic to get clang and ccache working together (suggestion from http://petereisentraut.blogspot.com/2011/09/ccache-and-clang-part-2.html):
+    set(ENV{CCACHE_CPP2} "yes")
+    if (${IS_CLANGXX})
+        append_args (CMAKE_CXX_FLAGS "-Qunused-arguments")
+    endif()
+    if (${IS_CLANG})
+        append_args (CMAKE_C_FLAGS "-Qunused-arguments")
+    endif()
+
+else()
+    message (STATUS "No ccache found")
+endif()
+
+
+# Force static linking
+set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "")
+
+function(get_compiler_name_version compiler_name compiler_version)
+    execute_process(COMMAND ${compiler_name} -dumpversion OUTPUT_VARIABLE this_version)
+    STRING(REGEX REPLACE "(\r?\n)+$" "" this_version "${this_version}")
+    set(${compiler_version} ${this_version} PARENT_SCOPE)
+endfunction()
+
+function(get_compiler_version compiler_version)
+    get_compiler_name_version(${CMAKE_CXX_COMPILER} this_version)
+    set(${compiler_version} ${this_version} PARENT_SCOPE)
+endfunction()
+
+# clang doesn't make finding the version easy for us,
+# and apple makes it even harder...
+macro(get_clang_version compiler_version)
+#    execute_process(COMMAND bash -c "${CMAKE_CXX_COMPILER} -v 2>&1 | awk '{printf $3; exit}'" OUTPUT_VARIABLE ${compiler_version})
+    execute_process(COMMAND bash -c "echo | ${CMAKE_CXX_COMPILER} -dM -E - | awk '/__clang_version__/ {printf $3; exit}' | tr -d '\"'" OUTPUT_VARIABLE ${compiler_version})
+    if (APPLE)
+        # translate apple clang version numbers back to root llvm value (better way to do this?)
+        if (${compiler_version} VERSION_LESS "3.1")
+            set (${compiler_version} "0.0")
+        elseif (${compiler_version} VERSION_LESS "4.2")
+            set (${compiler_version} "3.1")
+        elseif (${compiler_version} VERSION_LESS "5.0")
+            set (${compiler_version} "3.2")
+        elseif (${compiler_version} VERSION_LESS "5.1")
+            set (${compiler_version} "3.3")
+        elseif (${compiler_version} VERSION_LESS "6.0")
+            set (${compiler_version} "3.4")
+        elseif (${compiler_version} VERSION_LESS "6.1")
+            set (${compiler_version} "3.5")
+        else ()
+            set (${compiler_version} "3.6")
+        endif ()
+    endif ()
+endmacro()
+
+macro(test_min_compiler compiler_version min_compiler_version compiler_label)
+    if (${compiler_version} VERSION_LESS ${min_compiler_version})
+        message (FATAL_ERROR "Unsupported version for ${compiler_label}: ${compiler_version}: "
+                             "only versions >= ${min_compiler_version} are supported")
+    endif ()
+endmacro()
+
+
+set (min_gxx_version "4.8")
+set (min_clang_version "3.2")
+set (min_intel_version "15.0")
+set (min_msvc_version "1800") # cl.exe 18, as shipped in Visual Studio 12 2013
+
+set (CXX_COMPILER_NAME "${CMAKE_CXX_COMPILER_ID}")
+set (COMPILER_VERSION "UNKNOWN")
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    get_compiler_version(COMPILER_VERSION)
+    set (CXX_COMPILER_NAME "g++")
+    test_min_compiler(${COMPILER_VERSION} "${min_gxx_version}" "${CXX_COMPILER_NAME}")
+elseif (${IS_CLANGXX})
+    get_clang_version(COMPILER_VERSION)
+    set (CXX_COMPILER_NAME "clang++")
+    test_min_compiler(${COMPILER_VERSION} "${min_clang_version}" "${CXX_COMPILER_NAME}")
+elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+    get_compiler_version(COMPILER_VERSION)
+    set (CXX_COMPILER_NAME "icpc")
+    test_min_compiler(${COMPILER_VERSION} "${min_intel_version}" "${CXX_COMPILER_NAME}")
+elseif (MSVC)
+    set (COMPILER_VERSION ${MSVC_VERSION})
+    set (CXX_COMPILER_NAME "msvc")
+    test_min_compiler(${COMPILER_VERSION} "${min_msvc_version}" "${CXX_COMPILER_NAME}")
+endif ()
+
+message (STATUS "Using compiler: ${CXX_COMPILER_NAME} version ${COMPILER_VERSION}")
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+    # for intel we also need to test the minimum version of g++ currently
+    # in the path (because this is the stdc++ library that intel will use):
+    get_compiler_name_version("g++" gxx_compiler_version)
+    test_min_compiler(${gxx_compiler_version} "${min_gxx_version}" "g++ libstdc++ (library used by icpc)")
+    message (STATUS "Using libstdc++: gnu version ${gxx_compiler_version}")
+endif ()
+
+
+#
+# set compile flags
+#
+
+
+##
+## set static linking of standard libraries for binary redistribution:
+##
+set (IS_STANDARD_STATIC FALSE)
+if     (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    if (NOT (${COMPILER_VERSION} VERSION_LESS "4.5"))
+        set (IS_STANDARD_STATIC TRUE)
+    endif ()
+elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+    set (IS_STANDARD_STATIC TRUE)
+endif ()
+
+if (${IS_STANDARD_STATIC})
+    append_args (CMAKE_EXE_LINKER_FLAGS "-static-libgcc -static-libstdc++")
+endif ()
+
+
+##
+## set bug workarounds:
+##
+
+# determine version of libstdc++ library
+if     (CMAKE_CXX_COMPILER_ID STREQUAL "INTEL")
+    set(STDCXX_VERSION ${gxx_compiler_version})
+elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    set(STDCXX_VERSION ${COMPILER_VERSION})
+else ()
+    set(STDCXX_VERSION FALSE)
+endif ()
+
+if     (STDCXX_VERSION)
+    if (((${STDCXX_VERSION} VERSION_EQUAL "4.7") OR (${STDCXX_VERSION} VERSION_EQUAL "4.7.3")) OR
+        ((${STDCXX_VERSION} VERSION_EQUAL "4.8") OR (${STDCXX_VERSION} VERSION_EQUAL "4.8.2")))
+        # workaround for: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58800
+        add_definitions( -DBROKEN_NTH_ELEMENT )
+    endif ()
+endif ()
+
+#
+# not exactly a bug, but give clang a fighting chance on gcc5+ systems
+#
+if (${IS_CLANGXX})
+    # TODO only checked on linux clang, test this for AppleClang
+    # TODO assuming this will not be needed in clang 3.9+, check when 3.9 comes out.
+    add_definitions( -D_GLIBCXX_USE_CXX11_ABI=0 )
+endif ()
+
+
+##
+## set warning flags:
+##
+set (GNU_COMPAT_COMPILER ( (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR (${IS_CLANGXX}) OR (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")))
+if (${GNU_COMPAT_COMPILER})
+    append_args(CXX_WARN_FLAGS "-Wall -Wextra -Wshadow -Wunused -Wpointer-arith -Winit-self -pedantic -Wunused-parameter")
+    append_args(CXX_WARN_FLAGS "-Wundef -Wno-unknown-pragmas")
+    append_args(CXX_WARN_FLAGS "-Wdeprecated")
+
+    if ((NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel") OR (NOT ${COMPILER_VERSION} VERSION_LESS "14.0"))
+        append_args(CXX_WARN_FLAGS "-Wdisabled-optimization")
+        append_args(CXX_WARN_FLAGS "-Wno-missing-braces")
+    endif ()
+
+    if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+        append_args(CXX_WARN_FLAGS "-Wempty-body")
+        append_args(CXX_WARN_FLAGS "-Wredundant-decls")
+    endif ()
+
+    if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
+        append_args(CXX_WARN_FLAGS "-Wuninitialized")
+    endif ()
+elseif (MSVC)
+    append_args(CXX_WARN_FLAGS "/W3 /wd4305 /wd4244 /wd4068")
+    # suppress warnings for size_t to {unsigned,int, etc...} narrowing (most occur in 64 bit build):
+    append_args(CXX_WARN_FLAGS "/wd4267")
+
+    # suppress warning of symbol names greater than N (...where N=4096 for VSC++15)
+    append_args(CXX_WARN_FLAGS "/wd4503")
+endif ()
+
+if     (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    if (NOT (${COMPILER_VERSION} VERSION_LESS "4.2"))
+        append_args(CXX_WARN_FLAGS "-Wlogical-op")
+    endif ()
+
+    if ((${COMPILER_VERSION} VERSION_LESS "4.8") AND (NOT (${COMPILER_VERSION} VERSION_LESS "4.7")))
+        # switching off warning about unused function because otherwise compilation will fail with g++ 4.7.3 in Ubuntu,
+        # don't know which patch levels are affected, so marking out all gcc 4.7.X
+        append_args(CXX_WARN_FLAGS "-Wno-unused-function")
+    endif ()
+
+    if (NOT (${COMPILER_VERSION} VERSION_LESS "5.1"))
+        # these mostly only make sense with flto:
+        append_args(CXX_WARN_FLAGS "-Wodr")
+        #append_args(CXX_WARN_FLAGS "-Wsuggest-final-types -Wsuggest-final-methods")
+    endif ()
+
+    if (NOT (${COMPILER_VERSION} VERSION_LESS "6.1"))
+        append_args(CXX_WARN_FLAGS "-Wshift-negative-value -Wshift-overflow=2 -Wduplicated-cond")
+        #append_args(CXX_WARN_FLAGS "-Wnull-dereference")
+    endif ()
+
+elseif (${IS_CLANGXX})
+    # set to true to uncover new clang warnings after llvm update:
+    set (IS_WARN_EVERYTHING FALSE)
+
+    if (${IS_WARN_EVERYTHING})
+        append_args(CXX_WARN_FLAGS "-Weverything")
+    endif ()
+
+    append_args(CXX_WARN_FLAGS "-Wmissing-prototypes -Wunused-exception-parameter -Wbool-conversion")
+    append_args(CXX_WARN_FLAGS "-Wsizeof-array-argument -Wstring-conversion")
+    append_args(CXX_WARN_FLAGS "-Wheader-hygiene -Wmismatched-tags")
+
+    if (${IS_WARN_EVERYTHING})
+        append_args(CXX_WARN_FLAGS "-Wno-sign-conversion -Wno-weak-vtables -Wno-conversion -Wno-cast-align -Wno-padded")
+        append_args(CXX_WARN_FLAGS "-Wno-switch-enum -Wno-missing-noreturn -Wno-covered-switch-default")
+        append_args(CXX_WARN_FLAGS "-Wno-unreachable-code -Wno-global-constructors -Wno-exit-time-destructors")
+        append_args(CXX_WARN_FLAGS "-Wno-c++98-compat -Wno-old-style-cast -Wno-unused-member-function")
+        append_args(CXX_WARN_FLAGS "-Wno-documentation -Wno-float-equal")
+    endif ()
+
+    if (NOT (${COMPILER_VERSION} VERSION_LESS "3.2"))
+        append_args(CXX_WARN_FLAGS "-Wimplicit-fallthrough -Wloop-analysis -Wextra-semi")
+        append_args(CXX_WARN_FLAGS "-Wmissing-variable-declarations -Wunused-private-field")
+    endif ()
+
+    if (NOT (${COMPILER_VERSION} VERSION_LESS "3.3"))
+        append_args(CXX_WARN_FLAGS "-Woverloaded-shift-op-parentheses")
+
+        if (${IS_WARN_EVERYTHING})
+            append_args(CXX_WARN_FLAGS "-Wno-documentation-unknown-command")
+        endif ()
+    endif ()
+
+    if (NOT (${COMPILER_VERSION} VERSION_LESS "3.4"))
+        append_args(CXX_WARN_FLAGS "-Wheader-guard -Wlogical-not-parentheses")
+    endif ()
+
+    if (NOT (${COMPILER_VERSION} VERSION_LESS "3.6"))
+        append_args(CXX_WARN_FLAGS "-Wunreachable-code-return -Wkeyword-macro -Winconsistent-missing-override")
+
+        if (${IS_WARN_EVERYTHING})
+            append_args(CXX_WARN_FLAGS "-Wno-reserved-id-macro")
+        endif ()
+    endif ()
+
+    if (NOT (${COMPILER_VERSION} VERSION_LESS "3.7"))
+        if (${IS_WARN_EVERYTHING})
+            append_args(CXX_WARN_FLAGS "-Wno-c++98-compat-pedantic")
+        endif ()
+    endif ()
+
+    if (NOT (${COMPILER_VERSION} VERSION_LESS "3.8"))
+        append_args(CXX_WARN_FLAGS "-Wnon-virtual-dtor")
+
+        if (${IS_WARN_EVERYTHING})
+            append_args(CXX_WARN_FLAGS "-Wno-double-promotion")
+        endif ()
+    endif ()
+
+elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+    # suppress errors in boost headers:
+    append_args(CXX_WARN_FLAGS "-diag-disable 177,193,869,1599,3280")
+
+    append_args(CXX_WARN_FLAGS "-Wunused-variable -Wpointer-arith")
+
+    #append_args(CXX_WARN_FLAGS "-Wmissing-prototypes -Wmissing-declarations -Wunused-variable -Wpointer-arith -Wuninitialized")
+endif()
+
+append_args (CMAKE_CXX_FLAGS "${CXX_WARN_FLAGS}")
+
+#
+# other customizations
+#
+if (${GNU_COMPAT_COMPILER})
+    if ((NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel") OR (${COMPILER_VERSION} VERSION_LESS "15.0"))
+        append_args (CMAKE_CXX_FLAGS "-std=c++0x")
+    else ()
+        append_args (CMAKE_CXX_FLAGS "-std=c++11")
+    endif ()
+    set (CMAKE_CXX_FLAGS_DEBUG "-O0 -g")
+
+    # The NDEBUG macro is intentionally removed from release. One discussion on this is:
+    # http://www.drdobbs.com/an-exception-or-a-bug/184401686
+    set (CMAKE_CXX_FLAGS_RELEASE "-O3 -fomit-frame-pointer")
+    set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g")
+    set (CMAKE_CXX_FLAGS_ASAN "-O1 -g -fsanitize=address -fno-omit-frame-pointer -fno-optimize-sibling-calls")
+    #set (CMAKE_CXX_FLAGS_PROFILE "-O0 -g -pg -fprofile-arcs -ftest-coverage")
+
+    # this doesn't seem to impact performance, taking out for now:
+    #if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    #    set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -flto")
+    #endif ()
+endif()
+
+if (MSVC)
+    add_definitions(/D_CRT_SECURE_NO_WARNINGS)
+
+    # allow us to use standard c++ logical keywords
+    append_args (CMAKE_CXX_FLAGS "/FI\"ciso646\"")
+endif()
+
+
+# if ASan build type is requested, check that the compiler supports it:
+if (CMAKE_BUILD_TYPE STREQUAL "ASan")
+    set (IS_ASAN_SUPPORTED false)
+    if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+        if (NOT (${COMPILER_VERSION} VERSION_LESS "4.8"))
+            set (IS_ASAN_SUPPORTED true)
+        endif ()
+    elseif (${IS_CLANGXX})
+        if (NOT (${COMPILER_VERSION} VERSION_LESS "3.1"))
+            set (IS_ASAN_SUPPORTED true)
+        endif ()
+    endif ()
+
+    if (NOT ${IS_ASAN_SUPPORTED})
+        message(FATAL_ERROR "Address sanitizer build type requested, but this is not supported by compiler.")
+    endif ()
+endif ()
+
+#
+# take advantage of analyze on VS
+#
+if (MSVC)
+    if (IS_MSVC_ANALYZE)
+        append_args (CMAKE_CXX_FLAGS "/analyze")
+    endif ()
+endif ()
+
+
+if (${GNU_COMPAT_COMPILER})
+
+  if (${DEVELOPER_MODE})
+    # some compiler versions will produce warnings with no reasonable workaround,
+    # turn Werror off in this case
+    #
+    # a very common example are warnings from boost generated despite this library
+    # being identified as a system header
+    #
+    set(IS_WERROR true)
+    if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+        if (${COMPILER_VERSION} VERSION_LESS "4.2")
+            set(IS_WERROR false)
+        endif ()
+    endif ()
+
+    if(${IS_WERROR})
+        message (STATUS "Building in developer mode: treating compiler warnings as errors")
+        append_args (CMAKE_CXX_FLAGS "-Werror")
+    endif ()
+  endif ()
+
+  if (CMAKE_SYSTEM_PROCESSOR MATCHES "^i[67]86$")
+    ##
+    ## Use scalar floating point instructions from the SSE instruction set.
+    ## Note: Pentium3 SSE supports only single precision arithmetics
+    ##
+    append_args(CMAKE_CXX_FLAGS "-msse -mfpmath=sse")
+  elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^i[345]86$")
+    if (NOT ${IS_CLANGXX})
+      ##
+      ## Prevent using 80bit registers (more consistent rounding)
+      ##
+      append_args (CMAKE_CXX_FLAGS "-ffloat-store")
+    endif ()
+  endif ()
+
+endif()
+
+# cmake configure-time c++ configuration:
+#
+# don't include common subdirectory in config include path so that
+# we effectively namespace this config.h to reduce include
+# filename shadowing
+#
+set (CXX_CONFIG_BASENAME "common/config.h")
+set (THIS_CXX_CONFIG_IN_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib)
+set (THIS_CXX_CONFIG_H_DIR ${CMAKE_CURRENT_BINARY_DIR}/lib)
+set (CONFIG_DEST_FILE ${THIS_CXX_CONFIG_H_DIR}/${CXX_CONFIG_BASENAME})
+configure_file(${THIS_CXX_CONFIG_IN_DIR}/${CXX_CONFIG_BASENAME}.in ${CONFIG_DEST_FILE} @ONLY)
+
+# build-time c++ configuration:
+# note: (csaunders) tried to do this as add_custom_command every which way, can't get cmake to figure out
+#       dependency chain in this case
+set (CXX_BUILDTIME_CONFIG_BASENAME "common/configBuildTimeInfo.h")
+set (CXX_BUILDTIME_CONFIG_SOURCE_FILE ${THIS_CXX_CONFIG_IN_DIR}/${CXX_BUILDTIME_CONFIG_BASENAME}.in)
+set (CXX_BUILDTIME_CONFIG_DEST_FILE ${THIS_CXX_CONFIG_H_DIR}/${CXX_BUILDTIME_CONFIG_BASENAME})
+set (CXX_BUILDTIME_CONFIG_TARGET "${THIS_PROJECT_NAME}_cxx_buildtime_config")
+add_custom_target(${CXX_BUILDTIME_CONFIG_TARGET}
+    DEPENDS ${THIS_BUILDTIME_CONFIG_TARGET}
+    COMMAND ${CMAKE_COMMAND}
+    -D CONFIG_FILE=${THIS_BUILDTIME_CONFIG_FILE}
+    -D SOURCE_FILE=${CXX_BUILDTIME_CONFIG_SOURCE_FILE}
+    -D DEST_FILE=${CXX_BUILDTIME_CONFIG_DEST_FILE}
+    -P ${THIS_MODULE_DIR}/buildTimeConfigure.cmake)
+
+# special config hack for windows
+if (WIN32)
+    set (UNSTD_DEST_FILE ${THIS_CXX_CONFIG_H_DIR}/unistd.h)
+    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lib/blt_util/compat_unistd.h ${UNSTD_DEST_FILE} COPYONLY)
+endif ()
+
+#
+# include dirs:
+#
+set (THIS_CXX_BEFORE_SYSTEM_INCLUDES "${Boost_INCLUDE_DIRS}" "${HTSLIB_DIR}")
+set (THIS_CXX_ALL_INCLUDES "${THIS_SOURCE_DIR}/c++/lib")
diff --git a/src/cmake/cxxExecutable.cmake b/src/cmake/cxxExecutable.cmake
new file mode 100644
index 0000000..02d4a31
--- /dev/null
+++ b/src/cmake/cxxExecutable.cmake
@@ -0,0 +1,31 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## CMake configuration file for all the c++ executables
+##
+## author Roman Petrovski
+##
+################################################################################
+
+include (${THIS_CXX_COMMMON_CMAKE})
+
+get_filename_component(CURRENT_DIR_NAME ${CMAKE_CURRENT_SOURCE_DIR} NAME)
+message (STATUS "Adding c++ program subdirectory: ${CURRENT_DIR_NAME}")
diff --git a/src/cmake/cxxLibrary.cmake b/src/cmake/cxxLibrary.cmake
new file mode 100644
index 0000000..5d90cfb
--- /dev/null
+++ b/src/cmake/cxxLibrary.cmake
@@ -0,0 +1,76 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## CMake configuration file for all the c++ libraries
+##
+## author Come Raczy
+##
+################################################################################
+
+include (${THIS_CXX_COMMMON_CMAKE})
+
+get_filename_component(CURRENT_DIR_NAME ${CMAKE_CURRENT_SOURCE_DIR} NAME)
+message (STATUS "Adding c++ library subdirectory: ${CURRENT_DIR_NAME}")
+
+##
+## Some generators (VS) require all targets to be unique across the project.
+## Therefore, a unique prefix is needed to create the target names which are
+## shared across libraries
+##
+
+string(REGEX REPLACE ${THIS_SOURCE_DIR}/c[+][+]/ "" TMP1 ${CMAKE_CURRENT_SOURCE_DIR}/)
+string(REGEX REPLACE "/" "_" THIS_UNIQUE_PREFIX ${TMP1})
+
+##
+## build the library
+##
+
+file(GLOB THIS_LIBRARY_SOURCES *.cpp *.c)
+foreach (SOURCE_FILE ${THIS_LIBRARY_SOURCES})
+    get_filename_component(SOURCE_NAME ${SOURCE_FILE} NAME_WE)
+    if (${SOURCE_NAME}_COMPILE_FLAGS)
+        set_source_files_properties(${SOURCE_FILE} PROPERTIES COMPILE_FLAGS ${${SOURCE_NAME}_COMPILE_FLAGS})
+    endif ()
+endforeach ()
+
+# we don't need to add headers to the library for the build to work, but adding headers
+# results in better IDE support from cmake, without this step any "unpaired" header file
+# appears to be outside of the project for any IDE relying on cmake's project definition:
+file(GLOB THIS_LIBRARY_HEADERS *.hh)
+set (THIS_LIBRARY_SOURCES ${THIS_LIBRARY_SOURCES} ${THIS_LIBRARY_HEADERS})
+
+if (THIS_LIBRARY_SOURCES)
+    set (LIB_TARGET_NAME "${THIS_PROJECT_NAME}_${CURRENT_DIR_NAME}")
+    add_library     (${LIB_TARGET_NAME} STATIC ${THIS_LIBRARY_SOURCES})
+    add_dependencies(${LIB_TARGET_NAME} ${THIS_OPT})
+
+    # make the target project use folders when applying cmake IDE generators like Visual Studio
+    file(RELATIVE_PATH THIS_RELATIVE_LIBDIR "${THIS_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}")
+    set_property(TARGET ${LIB_TARGET_NAME} PROPERTY FOLDER "${THIS_RELATIVE_LIBDIR}")
+endif()
+
+##
+## build the unit tests if a "test" subdirectory is found:
+##
+if (IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/test")
+    message (STATUS "Adding c++ test subdirectory:    ${CURRENT_DIR_NAME}/test")
+    add_subdirectory (test)
+endif ()
diff --git a/src/cmake/cxxTestLibrary.cmake b/src/cmake/cxxTestLibrary.cmake
new file mode 100644
index 0000000..0f84ff9
--- /dev/null
+++ b/src/cmake/cxxTestLibrary.cmake
@@ -0,0 +1,72 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## Configuration file for the unit tests subdirectory
+##
+## author Ole Schulz-Trieglaff
+##
+################################################################################
+
+include (${THIS_CXX_COMMMON_CMAKE})
+
+set(TESTCONFIGNAME "test_config.h")
+set(TESTCONFIGSRC "${CMAKE_CURRENT_SOURCE_DIR}/${TESTCONFIGNAME}.in")
+set(TESTCONFIGDEST "${CMAKE_CURRENT_BINARY_DIR}/${TESTCONFIGNAME}")
+
+if (EXISTS "${TESTCONFIGSRC}")
+    configure_file("${TESTCONFIGSRC}" "${TESTCONFIGDEST}" @ONLY)
+    include_directories("${CMAKE_CURRENT_BINARY_DIR}")
+endif ()
+
+set(TEST_TARGET_NAME "${THIS_PROJECT_NAME}_unit_test_${THIS_LIB_DIR}")
+
+if (THIS_LIBRARY_SOURCES)
+    set(ADDITIONAL_UNITTEST_LIB ${ADDITIONAL_UNITTEST_LIB} ${THIS_PROJECT_NAME}_${THIS_LIB_DIR})
+endif ()
+
+if (WIN32)
+    # create a fake library target on win32 instead of linking and running the unit test
+    # this creates a project in VS that allows for interaction with the unit test code
+
+    # add all files to TEST_SOURCE to make the IDE project more usable:
+    file(GLOB TMP_TARGET_FILES *)
+
+    add_library     (${TEST_TARGET_NAME} STATIC ${TMP_TARGET_FILES})
+    #set_source_files_properties(thefile PROPERTIES HEADER_FILE_ONLY TRUE)
+    add_dependencies(${TEST_TARGET_NAME} ${THIS_OPT})
+else ()
+    file(GLOB TEST_SOURCE *.cpp)
+    add_executable(${TEST_TARGET_NAME} ${TEST_SOURCE})
+    add_dependencies(${TEST_TARGET_NAME} ${THIS_OPT})
+
+    target_link_libraries (${TEST_TARGET_NAME} ${ADDITIONAL_UNITTEST_LIB} ${THIS_AVAILABLE_LIBRARIES}
+                           ${HTSLIB_LIBRARY} ${Boost_LIBRARIES} ${THIS_ADDITIONAL_LIB})
+
+    set(TEST_BINARY ${CMAKE_CURRENT_BINARY_DIR}/${TEST_TARGET_NAME})
+
+    add_test(${TEST_TARGET_NAME} ${TEST_BINARY} "--log_level=test_suite")
+endif ()
+
+# make the target project use folders when applying cmake IDE generators like Visual Studio
+file(RELATIVE_PATH THIS_RELATIVE_LIBDIR "${THIS_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}")
+set_property(TARGET ${TEST_TARGET_NAME} PROPERTY FOLDER "${THIS_RELATIVE_LIBDIR}")
+
+add_dependencies(${THIS_UNITTESTS} ${TEST_TARGET_NAME})
diff --git a/src/cmake/getBuildTimeConfigInfo.cmake b/src/cmake/getBuildTimeConfigInfo.cmake
new file mode 100644
index 0000000..4af6a45
--- /dev/null
+++ b/src/cmake/getBuildTimeConfigInfo.cmake
@@ -0,0 +1,65 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+# \author Chris Saunders
+
+#
+# get various build-time configuration values -- this is information not available
+# at cmake configuration time
+#
+# requires SRC_DIR, REDIST_DIR and CONFIG_FILE
+#
+
+# generate git describe tag
+
+set (GETGIT_CMAKE "${REDIST_DIR}/cmake-modules-c99fd3/GetGitRevisionDescription.cmake")
+include ("${GETGIT_CMAKE}")
+git_describe(GIT_VERSION "${SRC_DIR}" --match "v[0-9]*" --dirty)
+if (NOT GIT_VERSION)
+    # try again without the --dirty flag, might be an older git:
+    git_describe(GIT_VERSION "${SRC_DIR}" --match "v[0-9]*")
+endif()
+
+if (NOT GIT_VERSION)
+    set(GIT_VERSION "UNKNOWN")
+else ()
+    STRING(REGEX REPLACE "^v" "" GIT_VERSION ${GIT_VERSION})
+endif ()
+set(WORKFLOW_VERSION ${GIT_VERSION})
+message(STATUS "Detected workflow version: ${WORKFLOW_VERSION}")
+file(WRITE ${CONFIG_FILE} "WORKFLOW_VERSION\t${WORKFLOW_VERSION}\n")
+
+#
+# get build timestamp
+#
+# python is a cross platform way to do this without newer cmake,
+# we have compile and runtime python req anyway.
+#
+find_package(PythonInterp QUIET)
+if (PYTHONINTERP_FOUND)
+    execute_process(
+        COMMAND ${PYTHON_EXECUTABLE} -c "import datetime;print(datetime.datetime.utcnow().isoformat())"
+        OUTPUT_VARIABLE BUILD_TIME
+        OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+    set (BUILD_TIME "${BUILD_TIME}Z")
+else ()
+    set (BUILD_TIME "UNKNOWN")
+endif ()
+file(APPEND ${CONFIG_FILE} "BUILD_TIME\t${BUILD_TIME}\n")
diff --git a/src/cmake/globals.cmake b/src/cmake/globals.cmake
new file mode 100644
index 0000000..e5fab52
--- /dev/null
+++ b/src/cmake/globals.cmake
@@ -0,0 +1,47 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## CMake configuration file to identify the configuration of the system
+##
+## author Roman Petrovski
+##
+################################################################################
+
+set(THIS_EXECUTABLE_PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE)
+set(THIS_LIBRARY_PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ)
+
+install(CODE "
+
+    # With package generator, the location where files are placed is not the location where they will be run.
+    # _FULL_ variables are guaranteed valid only at runtime
+    set (CPACK_GENERATOR \"${CPACK_GENERATOR}\")
+    set (FULL_PREFIX \"\")
+    if (NOT CPACK_GENERATOR)
+        set (FULL_PREFIX \"$ENV{DESTDIR}\")
+    endif()
+
+    get_filename_component(THIS_FULL_DATADIR        \"\${FULL_PREFIX}${THIS_DATADIR}\" ABSOLUTE)
+    get_filename_component(THIS_FULL_BINDIR         \"\${FULL_PREFIX}${THIS_BINDIR}\" ABSOLUTE)
+    get_filename_component(THIS_FULL_LIBDIR         \"\${FULL_PREFIX}${THIS_LIBDIR}\" ABSOLUTE)
+    get_filename_component(THIS_FULL_LIBEXECDIR     \"\${FULL_PREFIX}${THIS_LIBEXECDIR}\" ABSOLUTE)
+    get_filename_component(THIS_FULL_PYTHON_LIBDIR  \"\${FULL_PREFIX}${THIS_PYTHON_LIBDIR}\" ABSOLUTE)
+    ")
+
diff --git a/src/cmake/macros.cmake b/src/cmake/macros.cmake
new file mode 100644
index 0000000..d4e80b8
--- /dev/null
+++ b/src/cmake/macros.cmake
@@ -0,0 +1,139 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## CMake configuration file for common installation macros
+##
+## authors: Roman Petrovski, Mauricio Varea
+##
+################################################################################
+
+macro(configure_files srcDir destDir pattern)
+    file(GLOB templateFiles RELATIVE ${srcDir} ${srcDir}/${pattern})
+    foreach(templateFile ${templateFiles})
+        #message(STATUS "Configuring file ${srcDir}/${templateFile}")
+        configure_file(${srcDir}/${templateFile} ${destDir}/${templateFile} @ONLY)
+    endforeach()
+endmacro()
+
+
+macro(install_fileglob srcDir destDir pattern perm)
+    file(GLOB templateFiles ${srcDir}/${pattern})
+    install(FILES ${templateFiles} DESTINATION ${destDir} PERMISSIONS ${perm})
+endmacro()
+
+
+#
+# Macro to find libraries, with support for static-only search
+#
+macro(static_find_library name header library)
+    if    (NOT ${name}_INCLUDE_DIR)
+        find_path(${name}_INCLUDE_DIR ${header}
+                  HINTS ENV C_INCLUDE_PATH ENV CPATH ENV CPLUS_INCLUDE_PATH)
+    endif ()
+    if    (${name}_INCLUDE_DIR AND NOT ${name}_LIBRARY)
+        find_library(${name}_LIBRARY
+                     NAMES "${LIBRARY_PREFIX}${library}${LIBRARY_SUFFIX}"
+                     HINTS ENV LIBRARY_PATH)
+    endif ()
+    if(${name}_INCLUDE_DIR AND ${name}_LIBRARY)
+        set (HAVE_${name} ${${name}_LIBRARY})
+        message (STATUS "Found ${name}  header: ${${name}_INCLUDE_DIR}/${header}")
+        message (STATUS "Found ${name} library: ${${name}_LIBRARY}")
+    endif()
+endmacro()
+
+
+
+#
+# get all sub directories: (refaim at stackoverflow)
+#
+function(subdirlist result curdir)
+    file(GLOB children RELATIVE ${curdir} ${curdir}/*)
+    set(dirlist "")
+    foreach(child ${children})
+        if(IS_DIRECTORY ${curdir}/${child})
+            set(dirlist ${dirlist} ${child})
+        endif()
+    endforeach()
+    set(${result} ${dirlist} PARENT_SCOPE)
+endfunction()
+
+
+
+#
+# standard join(list) -> string function
+#
+# usage:
+# join (C_FLAG_LIST " " C_FLAG_STRING)
+#
+function(join list sep output)
+    set(tmp "")
+    foreach(val ${${list}})
+        if (tmp STREQUAL "")
+            set(tmp ${val})
+        else()
+            set(tmp "${tmp}${sep}${val}")
+        endif()
+    endforeach()
+    set(${output} ${tmp} PARENT_SCOPE)
+endfunction()
+
+
+# usage:
+# append(C_WARN_FLAGS " -Wall -Wextra")
+#
+function(append output)
+    set(tmp ${${output}})
+    foreach(val ${ARGN})
+        set(tmp "${tmp}${val}")
+    endforeach()
+    set(${output} ${tmp} PARENT_SCOPE)
+endfunction()
+
+# Appends strings with spaces added automatically:
+#
+# usage:
+# append_args(C_WARN_FLAGS "-Wall" "-Wextra")
+#
+function(append_args output)
+    set(sep " ")
+    set(tmp ${${output}})
+    foreach(val ${ARGN})
+        if ("${tmp}" STREQUAL "")
+            set(tmp "${val}")
+        else()
+            set(tmp "${tmp}${sep}${val}")
+        endif()
+    endforeach()
+    set(${output} ${tmp} PARENT_SCOPE)
+endfunction()
+
+
+include("${THIS_GLOBALS_CMAKE}") # get THIS_*_PERMISSIONS
+
+#
+# handle installation of a directory of python library code
+# TODO: generate py->pyc here as well
+#
+function(install_python_lib_dir fromdir todir)
+    install (DIRECTORY "${fromdir}/" DESTINATION "${todir}" FILE_PERMISSIONS ${THIS_LIBRARY_PERMISSIONS} FILES_MATCHING PATTERN "*.py")
+    install (DIRECTORY "${fromdir}/" DESTINATION "${todir}" FILE_PERMISSIONS ${THIS_EXECUTABLE_PERMISSIONS} FILES_MATCHING PATTERN "*.pyc")
+endfunction()
diff --git a/src/cmake/postInstall/CMakeLists.txt b/src/cmake/postInstall/CMakeLists.txt
new file mode 100644
index 0000000..76c638e
--- /dev/null
+++ b/src/cmake/postInstall/CMakeLists.txt
@@ -0,0 +1,26 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## author Roman Petrovski
+##
+################################################################################
+
+# place holder for future post-install actions
diff --git a/src/cmake/preInstall/CMakeLists.txt b/src/cmake/preInstall/CMakeLists.txt
new file mode 100644
index 0000000..c95ea49
--- /dev/null
+++ b/src/cmake/preInstall/CMakeLists.txt
@@ -0,0 +1,29 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## Configuration file for the preInstall subdirectory
+##
+## author Roman Petrovski
+##
+################################################################################
+
+add_subdirectory (checkTargetPathsWritable)
+add_subdirectory (copyrightAndChanges)
diff --git a/src/cmake/preInstall/checkTargetPathsWritable/CMakeLists.txt b/src/cmake/preInstall/checkTargetPathsWritable/CMakeLists.txt
new file mode 100644
index 0000000..e42688e
--- /dev/null
+++ b/src/cmake/preInstall/checkTargetPathsWritable/CMakeLists.txt
@@ -0,0 +1,35 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## author Roman Petrovski
+##
+################################################################################
+
+
+message (STATUS "Verifying target directories access")
+
+include ("${THIS_GLOBALS_CMAKE}")
+install(
+    CODE "set(THIS_TEST_DIRS \"\${THIS_FULL_DATADIR}\"
+                               \"\${THIS_FULL_BINDIR}\" \"\${THIS_FULL_LIBDIR}\"
+                               \"\${THIS_FULL_LIBEXECDIR}\")"
+    SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/checkTargetPathWritable.cmake"
+)
diff --git a/src/cmake/preInstall/checkTargetPathsWritable/checkTargetPathWritable.cmake b/src/cmake/preInstall/checkTargetPathsWritable/checkTargetPathWritable.cmake
new file mode 100644
index 0000000..9fee275
--- /dev/null
+++ b/src/cmake/preInstall/checkTargetPathsWritable/checkTargetPathWritable.cmake
@@ -0,0 +1,47 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## author Roman Petrovski
+##
+################################################################################
+
+
+foreach (THIS_DIR ${THIS_TEST_DIRS})
+    message (STATUS "Testing access to ${THIS_DIR}...")
+    set (TEST_DIR "${THIS_DIR}/test")
+    execute_process(
+        COMMAND ${CMAKE_COMMAND} -E make_directory "${TEST_DIR}"
+        RESULT_VARIABLE TMP_RESULT )
+    execute_process(
+        COMMAND ${CMAKE_COMMAND} -E remove_directory "${TEST_DIR}")
+
+    if (TMP_RESULT)
+        message (STATUS "ERROR: Directory is not writeable: ${THIS_DIR}")
+        message (STATUS "If you don't have administrator access to the "
+                         "target installation location, please use --prefix "
+                         "command-line option during configuration. "
+                         "Please see 'configure --help' for all installer "
+                         "command-line options.")
+        message (FATAL_ERROR "ERROR: installation cannot continue")
+    else ()
+        message (STATUS "Directory is writeable: ${THIS_DIR}")
+    endif ()
+endforeach ()
diff --git a/src/cmake/preInstall/copyrightAndChanges/CMakeLists.txt b/src/cmake/preInstall/copyrightAndChanges/CMakeLists.txt
new file mode 100644
index 0000000..611abe4
--- /dev/null
+++ b/src/cmake/preInstall/copyrightAndChanges/CMakeLists.txt
@@ -0,0 +1,29 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+#
+# Configuration file for the COPYRIGHT and Changes file installation
+#
+# author Roman Petrovski
+#
+
+# Installing top level components
+foreach(topfile ChangeLog.txt COPYRIGHT.txt LICENSE.txt)
+    install(FILES "${CMAKE_SOURCE_DIR}/${topfile}" DESTINATION "${THIS_DATADIR}")
+endforeach()
diff --git a/src/demo/CMakeLists.txt b/src/demo/CMakeLists.txt
new file mode 100644
index 0000000..ea73cff
--- /dev/null
+++ b/src/demo/CMakeLists.txt
@@ -0,0 +1,35 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## file CMakeLists.txt
+##
+################################################################################
+
+include ("${THIS_GLOBALS_CMAKE}")
+
+set (THIS_MANTADEMODIR "${THIS_DEMODIR}/manta")
+install(FILES "README.md" DESTINATION ${THIS_MANTADEMODIR})
+install(DIRECTORY "expectedResults" DESTINATION ${THIS_MANTADEMODIR})
+
+install(FILES "runMantaWorkflowDemo.py"  DESTINATION ${THIS_BINDIR} PERMISSIONS ${THIS_EXECUTABLE_PERMISSIONS})
+
+add_subdirectory (data)
+
diff --git a/src/demo/README.md b/src/demo/README.md
new file mode 100644
index 0000000..552b7b2
--- /dev/null
+++ b/src/demo/README.md
@@ -0,0 +1,23 @@
+Manta Workflow Demo
+-------------------
+
+This directory contains a small dateset which can be used to verify
+correct installation and demonstrate basic elements of the
+workflow. To run the demonstration, run the demo script found in the
+installation bin directory:
+
+```
+python ${MANTA_INSTALL_PATH}/bin/runMantaWorkflowDemo.py
+```
+
+This script creates a `MantaDemoAnalysis` directory under the current
+working directory, runs Manta on a small demo dataset, and compares
+the somatic structural variant output to an expected result.
+
+The demo data contain reads from HCC1954/HCC1954BL mapped in the
+vicinity of somatic translocation breakends corresponding to COSMIC
+variant [COST16011][1]. The demo sequencing data is extracted from
+[TCGA Benchmark 4][2].
+
+[1]:http://grch37-cancer.sanger.ac.uk/cosmic/rearrangement/overview?id=16011
+[2]:https://cghub.ucsc.edu/datasets/benchmark_download.html
diff --git a/src/demo/data/CMakeLists.txt b/src/demo/data/CMakeLists.txt
new file mode 100644
index 0000000..cbce860
--- /dev/null
+++ b/src/demo/data/CMakeLists.txt
@@ -0,0 +1,39 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+################################################################################
+##
+## file CMakeLists.txt
+##
+################################################################################
+
+set(FA_FILE "Homo_sapiens_assembly19.COST16011_region.fa")
+set(FA_ARCHIVE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FA_FILE}.tar.bz2)
+execute_process(
+    COMMAND ${CMAKE_COMMAND} -E tar xjf ${FA_ARCHIVE_FILE}
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+    )
+
+file (GLOB DATA_LIST *)
+list (REMOVE_ITEM DATA_LIST ${FA_ARCHIVE_FILE})
+list (REMOVE_ITEM DATA_LIST ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt)
+list (APPEND DATA_LIST ${CMAKE_CURRENT_BINARY_DIR}/${FA_FILE})
+
+install(FILES ${DATA_LIST} DESTINATION ${THIS_MANTADEMODIR}/data)
+
diff --git a/src/demo/data/G15512.HCC1954.1.COST16011_region.bam b/src/demo/data/G15512.HCC1954.1.COST16011_region.bam
new file mode 100755
index 0000000..27765c4
Binary files /dev/null and b/src/demo/data/G15512.HCC1954.1.COST16011_region.bam differ
diff --git a/src/demo/data/G15512.HCC1954.1.COST16011_region.bam.bai b/src/demo/data/G15512.HCC1954.1.COST16011_region.bam.bai
new file mode 100755
index 0000000..d9079c5
Binary files /dev/null and b/src/demo/data/G15512.HCC1954.1.COST16011_region.bam.bai differ
diff --git a/src/demo/data/HCC1954.NORMAL.30x.compare.COST16011_region.bam b/src/demo/data/HCC1954.NORMAL.30x.compare.COST16011_region.bam
new file mode 100755
index 0000000..f600ccd
Binary files /dev/null and b/src/demo/data/HCC1954.NORMAL.30x.compare.COST16011_region.bam differ
diff --git a/src/demo/data/HCC1954.NORMAL.30x.compare.COST16011_region.bam.bai b/src/demo/data/HCC1954.NORMAL.30x.compare.COST16011_region.bam.bai
new file mode 100755
index 0000000..9e68a28
Binary files /dev/null and b/src/demo/data/HCC1954.NORMAL.30x.compare.COST16011_region.bam.bai differ
diff --git a/src/demo/data/Homo_sapiens_assembly19.COST16011_region.fa.fai b/src/demo/data/Homo_sapiens_assembly19.COST16011_region.fa.fai
new file mode 100755
index 0000000..148bd67
--- /dev/null
+++ b/src/demo/data/Homo_sapiens_assembly19.COST16011_region.fa.fai
@@ -0,0 +1,2 @@
+8	146364022	3	60	61
+11	135006516	148803430	60	61
diff --git a/src/demo/data/Homo_sapiens_assembly19.COST16011_region.fa.tar.bz2 b/src/demo/data/Homo_sapiens_assembly19.COST16011_region.fa.tar.bz2
new file mode 100644
index 0000000..5325c43
Binary files /dev/null and b/src/demo/data/Homo_sapiens_assembly19.COST16011_region.fa.tar.bz2 differ
diff --git a/src/demo/expectedResults/somaticSV.vcf.gz b/src/demo/expectedResults/somaticSV.vcf.gz
new file mode 100644
index 0000000..9639a30
Binary files /dev/null and b/src/demo/expectedResults/somaticSV.vcf.gz differ
diff --git a/src/demo/runMantaWorkflowDemo.py b/src/demo/runMantaWorkflowDemo.py
new file mode 100644
index 0000000..3951081
--- /dev/null
+++ b/src/demo/runMantaWorkflowDemo.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+Execute small manta demonstration/verification run
+"""
+
+def main() :
+    import os,sys
+
+    import gzip
+    import subprocess
+
+    #
+    # initialize paths:
+    #
+    scriptDir=os.path.abspath(os.path.dirname(__file__))
+    demoDir=os.path.abspath(os.path.join(scriptDir,os.pardir,"share","demo","manta"))
+    dataDir=os.path.join(demoDir,"data")
+    expectedDir=os.path.join(demoDir,"expectedResults")
+
+    analysisDir="MantaDemoAnalysis"
+
+    configScript=os.path.join(scriptDir,"configManta.py")
+
+    # error logging
+    #
+    logfp = sys.stderr
+
+
+    if not os.path.exists(configScript) :
+        logfp.write("\n")
+        logfp.write("ERROR: Manta workflow must be installed prior to running demo.\n")
+        logfp.write("\n")
+        sys.exit(2)
+
+    #
+    # Step 1: configure demo
+    #
+    if os.path.exists(analysisDir) :
+        logfp.write("\n")
+        logfp.write("ERROR: Demo analysis directory already exists: '" + analysisDir + "'\n")
+        logfp.write("       Please remove/move this to rerun demo.\n")
+        logfp.write("\n")
+        sys.exit(2)
+
+    cmd  = "\"%s\"" % (configScript)
+    cmd += " --normalBam=\"%s\"" % (os.path.join(dataDir,"HCC1954.NORMAL.30x.compare.COST16011_region.bam"))
+    cmd += " --tumorBam=\"%s\"" % (os.path.join(dataDir,"G15512.HCC1954.1.COST16011_region.bam"))
+    cmd += " --referenceFasta=\"%s\"" % (os.path.join(dataDir,"Homo_sapiens_assembly19.COST16011_region.fa"))
+    cmd += " --region=8:107652000-107655000"
+    cmd += " --region=11:94974000-94989000"
+    cmd += " --candidateBins=4"
+    cmd += " --exome"
+    cmd += " --runDir=\"%s\"" % (analysisDir)
+
+    logfp.write("\n")
+    logfp.write("**** Starting demo configuration and run.\n")
+    logfp.write("**** Configuration cmd: %s\n" % (cmd))
+    logfp.write("\n")
+
+    config_retval=subprocess.call(cmd,shell=True)
+
+    if config_retval != 0 :
+        logfp.write("\n")
+        logfp.write("ERROR: Demo configuration step failed\n")
+        logfp.write("\n")
+        sys.exit(1)
+    else :
+        logfp.write("\n")
+        logfp.write("**** Completed demo configuration.\n")
+        logfp.write("\n")
+
+    #
+    # Step 2: run demo (on single local core):
+    #
+    cmd=[sys.executable,"-E",os.path.join(analysisDir,"runWorkflow.py"),"-m","local","-j","1","-g","4"]
+
+    logfp.write("\n")
+    logfp.write("**** Starting demo workflow execution.\n")
+    logfp.write("**** Workflow cmd: '%s'\n" % (" ".join(cmd)))
+    logfp.write("\n")
+
+    run_retval=subprocess.call(cmd)
+
+    if run_retval != 0 :
+        logfp.write("\n")
+        logfp.write("ERROR: Workflow execution step failed\n")
+        logfp.write("\n")
+        sys.exit(1)
+    else :
+        logfp.write("\n")
+        logfp.write("**** Completed demo workflow execution.\n")
+        logfp.write("\n")
+
+
+    #
+    # Step 3: Compare results to expected calls
+    #
+    resultsDir=os.path.join(analysisDir,"results","variants")
+    logfp.write("\n")
+    logfp.write("**** Starting comparison to expected results.\n")
+    logfp.write("**** Expected results dir: '%s'\n" % (expectedDir))
+    logfp.write("**** Demo results dir: '%s'\n" % (resultsDir))
+    logfp.write("\n")
+
+    import re
+    rexclude = re.compile("##(fileDate|source|startTime|reference|cmdline)")
+
+    def rstream(f) :
+        """
+        stream filtered lines from gzipped manta vcf
+        """
+
+        def rstreamFilter(line) :
+            """
+            predicate defining manta output lines to ignore
+            """
+            return (rexclude.match(line) is not None)
+
+        rfp = gzip.open(f)
+        for line in rfp :
+            if rstreamFilter(line) : continue
+            yield line
+
+    import difflib
+
+    for f in os.listdir(expectedDir) :
+        efile=os.path.join(expectedDir,f)
+        rfile=os.path.join(resultsDir,f)
+        if not os.path.isfile(efile): continue
+
+        udiff_output = difflib.unified_diff(list(rstream(efile)),list(rstream(rfile)),fromfile=efile,tofile=rfile,n=0)
+        udiff_str = "".join(list(udiff_output))
+
+        if len(udiff_str) :
+            logfp.write(udiff_str)
+
+            logfp.write("\n")
+            logfp.write("\n")
+            logfp.write("ERROR: Found difference between demo and expected results in file '%s'.\n" % (f))
+            logfp.write("       Expected file: '%s'\n" % (efile))
+            logfp.write("       Demo results file: '%s'\n" % (rfile))
+            logfp.write("\n")
+            logfp.write("\n")
+
+            sys.exit(1)
+
+    logfp.write("\n")
+    logfp.write("**** No differences between expected and computed results.\n")
+    logfp.write("\n")
+
+    logfp.write("\n")
+    logfp.write("**** Demo/verification successfully completed\n")
+    logfp.write("\n")
+
+
+
+main()
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
new file mode 100644
index 0000000..0bcac8f
--- /dev/null
+++ b/src/python/CMakeLists.txt
@@ -0,0 +1,22 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+add_subdirectory (bin)
+add_subdirectory (lib)
+add_subdirectory (libexec)
diff --git a/src/python/bin/CMakeLists.txt b/src/python/bin/CMakeLists.txt
new file mode 100644
index 0000000..afca5a0
--- /dev/null
+++ b/src/python/bin/CMakeLists.txt
@@ -0,0 +1,35 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+set(INSTALL_TO_DIR "${THIS_BINDIR}")
+
+file(RELATIVE_PATH THIS_RELATIVE_PYTHON_LIBDIR "${INSTALL_TO_DIR}" "${THIS_PYTHON_LIBDIR}")
+
+include ("${THIS_MACROS_CMAKE}")
+
+configure_files("${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}" "*.py")
+configure_files("${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}" "*.ini")
+
+include("${THIS_GLOBALS_CMAKE}") # required for *_PERMISSIONS values
+
+install_fileglob("${CMAKE_CURRENT_BINARY_DIR}" "${INSTALL_TO_DIR}" "*.py"
+                 "${THIS_EXECUTABLE_PERMISSIONS}")
+install_fileglob("${CMAKE_CURRENT_BINARY_DIR}" "${INSTALL_TO_DIR}" "*.ini"
+                 "${THIS_LIBRARY_PERMISSIONS}")
+
diff --git a/src/python/bin/configManta.py b/src/python/bin/configManta.py
new file mode 100644
index 0000000..a690c13
--- /dev/null
+++ b/src/python/bin/configManta.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+This script configures the Manta SV analysis workflow
+"""
+
+import os,sys
+
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+scriptName=os.path.basename(__file__)
+workflowDir=os.path.abspath(os.path.join(scriptDir,"@THIS_RELATIVE_PYTHON_LIBDIR@"))
+
+sys.path.append(workflowDir)
+
+from configBuildTimeInfo import workflowVersion
+from mantaOptions import MantaWorkflowOptionsBase
+from configureUtil import BamSetChecker, groomBamList, OptParseException
+from makeRunScript import makeRunScript
+from mantaWorkflow import MantaWorkflow
+from workflowUtil import ensureDir
+
+
+
+class MantaWorkflowOptions(MantaWorkflowOptionsBase) :
+
+    def workflowDescription(self) :
+        return """Version: %s
+
+This script configures the Manta SV analysis pipeline.
+You must specify a BAM or CRAM file for at least one sample.
+""" % (workflowVersion)
+
+
+    def addWorkflowGroupOptions(self,group) :
+        group.add_option("--bam","--normalBam", type="string",dest="normalBamList",metavar="FILE", action="append",
+                         help="Normal sample BAM or CRAM file. May be specified more than once, multiple inputs will be treated as each BAM file representing a different sample. [optional] (no default)")
+        group.add_option("--tumorBam","--tumourBam", type="string",dest="tumorBamList",metavar="FILE", action="append",
+                          help="Tumor sample BAM or CRAM file. Only up to one tumor bam file accepted. [optional] (no default)")
+        group.add_option("--exome", dest="isExome", action="store_true",
+                         help="Set options for WES input: turn off depth filters")
+        group.add_option("--rna", dest="isRNA", action="store_true",
+                         help="Set options for RNA-Seq input: turn off depth filters and don't treat "
+                              "anomalous reads as SV evidence when the proper-pair bit is set.")
+        group.add_option("--unstrandedRNA", dest="isUnstrandedRNA", action="store_true",
+                         help="Set if RNA-Seq input is unstranded: Allows splice-junctions on either strand")
+
+        MantaWorkflowOptionsBase.addWorkflowGroupOptions(self,group)
+
+
+    def addExtendedGroupOptions(self,group) :
+        group.add_option("--useExistingAlignStats",
+                         dest="useExistingAlignStats", action="store_true",
+                         help="Use pre-calculated alignment statistics.")
+        group.add_option("--useExistingChromDepths",
+                         dest="useExistingChromDepths", action="store_true",
+                         help="Use pre-calculated chromosome depths.")
+        group.add_option("--candidateBins",type="int",
+                         dest="nonlocalWorkBins", metavar="candidateBins",
+                         help="Provide the total number of tasks which candidate generation "
+                            " will be sub-divided into. (default: %default)")
+        group.add_option("--retainTempFiles",
+                         dest="isRetainTempFiles", action="store_true",
+                         help="Keep all temporary files (for workflow debugging)")
+        group.add_option("--generateEvidenceBam",
+                         dest="isGenerateSupportBam", action="store_true",
+                         help="Generate a bam of supporting reads for all SVs")
+
+        MantaWorkflowOptionsBase.addExtendedGroupOptions(self,group)
+
+
+    def getOptionDefaults(self) :
+
+        self.configScriptDir=scriptDir
+        defaults=MantaWorkflowOptionsBase.getOptionDefaults(self)
+        defaults.update({
+            'runDir' : 'MantaWorkflow',
+            'isExome' : False,
+            'isRNA' : False,
+            'isUnstrandedRNA' : False,
+            'useExistingAlignStats' : False,
+            'useExistingChromDepths' : False,
+            'isRetainTempFiles' : False,
+            'isGenerateSupportBam' : False,
+            'nonlocalWorkBins' : 256
+                          })
+        return defaults
+
+
+
+    def validateAndSanitizeExistingOptions(self,options) :
+
+        groomBamList(options.normalBamList,"normal sample")
+        groomBamList(options.tumorBamList, "tumor sample")
+
+        MantaWorkflowOptionsBase.validateAndSanitizeExistingOptions(self,options)
+
+
+
+    def validateOptionExistence(self,options) :
+
+        def safeLen(x) :
+            if x is None : return 0
+            return len(x)
+
+        if ((safeLen(options.normalBamList) == 0) and
+            (safeLen(options.tumorBamList) == 0)) :
+            raise OptParseException("No normal or tumor sample alignment files specified")
+
+        if (safeLen(options.tumorBamList) > 1) :
+            raise OptParseException("Can't accept more then one tumor sample")
+
+        if ((safeLen(options.tumorBamList) > 0) and (safeLen(options.normalBamList) > 1)) :
+            raise OptParseException("Can't accept multiple normal samples for tumor subtraction")
+
+        bcheck = BamSetChecker()
+        bcheck.appendBams(options.normalBamList,"Normal")
+        bcheck.appendBams(options.tumorBamList,"Tumor")
+        bcheck.check(options.htsfileBin,
+                     options.referenceFasta)
+
+        MantaWorkflowOptionsBase.validateOptionExistence(self,options)
+
+
+
+
+def main() :
+
+    primarySectionName="manta"
+    options,iniSections=MantaWorkflowOptions().getRunOptions(primarySectionName, version=workflowVersion)
+
+    # we don't need to instantiate the workflow object during configuration,
+    # but this is done here to trigger additional parameter validation:
+    #
+    MantaWorkflow(options,iniSections)
+
+    # generate runscript:
+    #
+    ensureDir(options.runDir)
+    scriptFile=os.path.join(options.runDir,"runWorkflow.py")
+
+    makeRunScript(scriptFile,os.path.join(workflowDir,"mantaWorkflow.py"),"MantaWorkflow",primarySectionName,iniSections)
+
+    notefp=sys.stdout
+    notefp.write("""
+Successfully created workflow run script.
+To execute the workflow, run the following script and set appropriate options:
+
+%s
+""" % (scriptFile))
+
+
+if __name__ == "__main__" :
+    main()
+
diff --git a/src/python/bin/configManta.py.ini b/src/python/bin/configManta.py.ini
new file mode 100644
index 0000000..2aba5e5
--- /dev/null
+++ b/src/python/bin/configManta.py.ini
@@ -0,0 +1,36 @@
+
+#
+# This section contains all configuration settings for the top-level manta workflow,
+#
+[manta]
+
+referenceFasta = /illumina/development/Isis/Genomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa
+
+# Run discovery and candidate reporting for all SVs/indels at or above this size:
+minCandidateVariantSize = 8
+
+# Remove all edges from the graph unless they're supported by this many 'observations'.
+# Note that one supporting read pair or split read usually equals one observation, but evidence is sometimes downweighted.
+minEdgeObservations = 3
+
+# Run discovery and candidate reporting for all SVs/indels with at least this
+# many spanning support observations
+minCandidateSpanningCount = 3
+
+# After candidate identification, only score and report SVs/indels at or above this size:
+minScoredVariantSize = 51
+
+# minimum VCF "QUAL" score for a variant to be included in the diploid vcf:
+minDiploidVariantScore = 10
+
+# VCF "QUAL" score below which a variant is marked as filtered in the diploid vcf:
+minPassDiploidVariantScore = 20
+
+# minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf:
+minPassDiploidGTScore = 15
+
+# somatic quality scores below this level are not included in the somatic vcf:
+minSomaticScore = 10
+
+# somatic quality scores below this level are filtered in the somatic vcf:
+minPassSomaticScore = 30
diff --git a/src/python/lib/CMakeLists.txt b/src/python/lib/CMakeLists.txt
new file mode 100644
index 0000000..c7e4461
--- /dev/null
+++ b/src/python/lib/CMakeLists.txt
@@ -0,0 +1,84 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+#
+# \author Chris Saunders
+#
+
+# final installation path:
+set(INSTALL_TO_DIR "${THIS_PYTHON_LIBDIR}")
+
+# clean staging area for all py and pyc files:
+set(PYSTAGE_DIR "${CMAKE_CURRENT_BINARY_DIR}/pystage")
+
+file(RELATIVE_PATH THIS_RELATIVE_LIBEXECDIR "${INSTALL_TO_DIR}" "${THIS_LIBEXECDIR}")
+
+set(BUILD_CONFIG_PY "configBuildTimeInfo.py")
+file(GLOB PYTHON_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/*.py")
+foreach(PYTHON_FILE ${PYTHON_FILES})
+    if (NOT ${PYTHON_FILE} STREQUAL ${BUILD_CONFIG_PY})
+        configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${PYTHON_FILE}" "${PYSTAGE_DIR}/${PYTHON_FILE}" @ONLY)
+    endif ()
+endforeach()
+
+# build-time configuration:
+#
+# buildConfig.py needs to be configured at build time, this actually doubles
+#
+set (PY_BUILD_CONFIG_TARGET "${THIS_PROJECT_NAME}_python_buildtime_config")
+add_custom_target(
+    ${PY_BUILD_CONFIG_TARGET}
+    DEPENDS ${THIS_BUILDTIME_CONFIG_TARGET}
+    COMMAND ${CMAKE_COMMAND}
+    -D CONFIG_FILE=${THIS_BUILDTIME_CONFIG_FILE}
+    -D SOURCE_FILE=${CMAKE_CURRENT_SOURCE_DIR}/${BUILD_CONFIG_PY}
+    -D DEST_FILE=${PYSTAGE_DIR}/${BUILD_CONFIG_PY}
+    -P ${THIS_MODULE_DIR}/buildTimeConfigure.cmake)
+
+
+if (PYTHONINTERP_FOUND)
+    #
+    # compile all py to pyc for:
+    # (1) build-time error-checking (primarily)
+    # (2) faster run-time response (in theory??? -- I don't observe)
+    #
+    file(GLOB STAGED_PYTHON_FILES "${PYSTAGE_DIR}/*.py")
+    foreach(PYFILE ${STAGED_PYTHON_FILES})
+        list(APPEND STAGED_PYTHONC_FILES "${PYFILE}c")
+    endforeach()
+
+    add_custom_command(
+        OUTPUT ${STAGED_PYTHONC_FILES}
+        COMMAND ${PYTHON_EXECUTABLE} -m compileall -q ${PYSTAGE_DIR}
+        COMMENT "Compiling python library source")
+
+    set(THIS_PYTHONLIB_COMPILE "${THIS_PROJECT_NAME}_pythonlib_compile")
+    add_custom_target(${THIS_PYTHONLIB_COMPILE} ALL
+        DEPENDS ${STAGED_PYTHONC_FILES}
+        DEPENDS ${PY_BUILD_CONFIG_TARGET})
+endif ()
+
+
+
+#
+# install
+#
+include("${THIS_MACROS_CMAKE}")
+install_python_lib_dir("${PYSTAGE_DIR}" "${INSTALL_TO_DIR}")
+
diff --git a/src/python/lib/checkChromSet.py b/src/python/lib/checkChromSet.py
new file mode 100644
index 0000000..1860815
--- /dev/null
+++ b/src/python/lib/checkChromSet.py
@@ -0,0 +1,166 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+import os,sys
+
+
+"""
+This module contains functions to check bam header and reference consistency
+"""
+
+
+def chromError(msg) :
+    """
+    Put this here as a placeholder for custom error handling later
+    """
+    sys.stderr.write("\n"+"CONFIGURATION ERROR:\n"+msg+"\n\n")
+    sys.exit(1)
+
+
+
+def getFastaInfo(fasta) :
+    """
+    check that fai file is properly formatted (not like the GATK bundle NCBI 37 fai files)
+
+    returns hash of chrom length
+    """
+
+    fai=fasta+".fai"
+    assert os.path.isfile(fai)
+
+    info={}
+
+    for i,line in enumerate(open(fai)) :
+        w=line.strip().split()
+        if len(w) != 5 :
+            msg  = "Unexpected format for line number '%i' of fasta index file: '%s'\n" % (i,fai)
+            msg += "\tRe-running fasta indexing may fix the issue. To do so, run: \"samtools faidx %s\"" % (fasta)
+            chromError(msg)
+        info[w[0]]=int(w[1])
+
+    return info
+
+
+
+def getBamChromInfo(htsfileBin,bam) :
+    """
+    Get chromosome information from bam/cram header
+
+    return a map of [chrom_name]=(chrom_size,chrom_order)
+    """
+
+    import subprocess
+
+    cmd="\"%s\" -h \"%s\"" % (htsfileBin,bam)
+
+    info = {}
+    chromIndex=0
+
+    proc=subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
+    for line in proc.stdout :
+        if not line.startswith("@SQ") : continue
+        w = line.strip().split('\t')
+        if len(w) < 3 :
+            chromError("Unexpected BAM/CRAM header for file '%s'" % (bam))
+
+        h = {}
+        for word in w[1:] :
+            vals=word.split(':', 1)
+            h[vals[0]] = vals[1]
+
+        key = h["SN"]
+        size = int(h["LN"])
+        if size <= 0 :
+            chromError("Unexpected chromosome size '%i' in BAM/CRAM header for file '%s'" % (size,bam))
+
+        info[key] = (size,chromIndex)
+        chromIndex += 1
+
+    proc.wait()
+    if proc.returncode != 0 :
+        chromError("Failed to pipe command: '%s'" % (cmd))
+
+    return info
+
+
+
+def checkChromSet(htsfileBin,referenceFasta,bamList,bamLabel=None,isReferenceLocked=False) :
+    """
+    Check that chromosomes in reference and input bam/cram(s) are consistent
+
+    @param htsfileBin - htsfile binary
+    @param referenceFasta - samtools indexed fasta file
+    @param bamList - a container of indexed bam/cram(s) to check for consistency
+    @param bamLabel - a container of labels for each bam/cram file (default is to label files by index number)
+    @param isReferenceLocked - if true, then the input BAMs must contain all of the chromosomes in the reference fasta
+
+    This function closely follows the strelka input configuration step validator
+    """
+
+    if len(bamList) == 0 : return
+
+    if bamLabel is None :
+        bamLabel = [ "index%i" % (x) for x in range(len(bamList)) ]
+
+    assert len(bamLabel) == len(bamList)
+
+    refChromInfo = getFastaInfo(referenceFasta)
+
+    # first bam is used as a reference:
+    chromInfo = getBamChromInfo(htsfileBin,bamList[0])
+    chroms = sorted(chromInfo.keys(),key=lambda x:chromInfo[x][1])
+
+    # check that first bam is compatible with reference:
+    for chrom in chroms :
+        isError=False
+        if chrom not in refChromInfo :
+            isError = True
+        else :
+            if refChromInfo[chrom] != chromInfo[chrom][0] :
+                isError = True
+
+        if isError :
+            chromError("Reference fasta and '%s' BAM/CRAM file conflict on chromosome: '%s'" % (bamLabel[0],chrom))
+
+    # optionally check that BAM contains all chromosomes in reference:
+    if isReferenceLocked :
+        for refChrom in refChromInfo.keys() :
+            if refChrom not in chroms :
+                chromError("'%s' BAM/CRAM file is missing reference fasta chromosome: '%s'" % (bamLabel[0],refChrom))
+
+    # check that other bams are compatible with first bam:
+    for index in range(1,len(bamList)) :
+        compareChromInfo=getBamChromInfo(htsfileBin,bamList[index])
+        for chrom in chroms:
+            isError=False
+            if not chrom in compareChromInfo :
+                isError=True
+            else :
+                (ln,order) = chromInfo[chrom]
+                (tln,torder) = compareChromInfo[chrom]
+                if ln != tln or order != torder : isError=True
+
+            if isError :
+                chromError("'%s' and '%s' BAM/CRAM files have a conflict on chromosome: '%s'" % (bamLabel[0],bamLabel[index],chrom))
+
+            del compareChromInfo[chrom]
+
+        # check that no chromosomes are unique to the tumor:
+        for chrom in compareChromInfo.keys() :
+            chromError("'%s' and '%s' BAM/CRAM files have a conflict on chromosome: '%s'" % (bamLabel[0],bamLabel[index],chrom))
diff --git a/src/python/lib/configBuildTimeInfo.py b/src/python/lib/configBuildTimeInfo.py
new file mode 100644
index 0000000..b206620
--- /dev/null
+++ b/src/python/lib/configBuildTimeInfo.py
@@ -0,0 +1,27 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+This consolidates build-time config data such as git status
+and build date. This is in contrast to cmake configuration-time
+config data like relative paths and library/header availability.
+"""
+
+workflowVersion="@WORKFLOW_VERSION@"
+buildTime="@BUILD_TIME@"
diff --git a/src/python/lib/configureOptions.py b/src/python/lib/configureOptions.py
new file mode 100644
index 0000000..47a748c
--- /dev/null
+++ b/src/python/lib/configureOptions.py
@@ -0,0 +1,246 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+import os,sys
+
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+scriptName=os.path.basename(__file__)
+
+from configureUtil import dumpIniSections, getIniSections, OptParseException
+
+
+
+def noArgOrError(parser,msg) :
+    if len(sys.argv) <= 1 :
+        parser.print_help()
+        sys.exit(2)
+    else :
+        parser.error(msg)
+
+
+
+class ConfigureWorkflowOptions(object) :
+    """
+    This class consolidates common configuration functions
+    for setting up workflows, specific configurations
+    overload the indicated functions to gather/validate
+    specific parameter sets for their workflow.
+    """
+
+    # override methods;
+
+    def workflowDescription(self) :
+        """
+        brief description of the workflow to appear in usage
+        """
+        return ""
+
+    def addWorkflowGroupOptions(self,group) :
+        """
+        add options to OptionsGroup object which specify
+        parameters which commonly change from run to run
+        """
+        pass
+
+    def addExtendedGroupOptions(self,group) :
+        """
+        This options are expected to change less frequently and
+        should live in the ini file, they will not appear if a
+        default exists.
+        """
+        pass
+
+    def getOptionDefaults(self) :
+        """
+        Provide any option defaults. This is a good place to specify
+        parameters which probably don't need to be set from their
+        default. This is not a great place for anything that would
+        be site-specific configuration.
+        """
+        return {}
+
+    def validateAndSanitizeExistingOptions(self,options) :
+        """
+        validate any arguments in options which are not None, and
+        optionally sanitize them. This validation step is run
+        *before* writing the ini file
+        """
+        pass
+
+    def validateOptionExistence(self,options) :
+        """
+        validate that all required options actually exist
+
+        this method is run *after* writing template ini file
+        """
+        pass
+
+
+    # public methods:
+
+    def getRunOptions(self, primary_section, version = None, configHelp=None) :
+        """
+        primary client code interface to the finished product.
+        do not override this method
+
+        This returns a tuple of the (1) a class holding all of the
+        primary run options gathered from the primary section of the ini
+        file and command-line options and (2) an inifile hash-of-hashes
+        reflecting all sections of the ini file.
+        """
+
+        def updateIniSections(data,newData) :
+            for k in newData.keys() :
+                if k not in data : data[k] = {}
+                for kk in newData[k].keys() :
+                    data[k][kk] = newData[k][kk]
+
+
+        # first level of options are those hard coded into the python code as defaults,
+        # these have the lowest precedence:
+        #
+        iniSections = { primary_section : self.getOptionDefaults() }
+
+        # next is the 'global' ini file, in the same directory as the configure
+        # script:
+        cmdlineScriptName=os.path.basename(sys.argv[0])
+        configFileName=cmdlineScriptName+".ini"
+
+        cmdlineScriptDir=os.path.abspath(os.path.dirname(sys.argv[0]))
+        globalConfigPath=os.path.join(cmdlineScriptDir,configFileName)
+        updateIniSections(iniSections,getIniSections(globalConfigPath))
+
+        parser=self._getOptionParser(iniSections[primary_section],configFileName, cmdlineScriptDir,
+                                     version=version, configHelp=configHelp)
+        (options,args) = parser.parse_args()
+
+        if options.userConfigPath :
+            if not os.path.isfile(options.userConfigPath) :
+                raise OptParseException("Can't find config file: '%s'" % (options.userConfigPath))
+
+            updateIniSections(iniSections,getIniSections(options.userConfigPath))
+
+            # reparse with updated default values:
+            parser=self._getOptionParser(iniSections[primary_section],configFileName, cmdlineScriptDir,
+                                         version=version, configHelp=configHelp)
+            (options,args) = parser.parse_args()
+
+        if options.isAllHelp :
+            # this second call to getOptionParser is only here to provide the extended help option:
+            parser=self._getOptionParser(iniSections[primary_section],configFileName, cmdlineScriptDir, True,
+                                         version=version, configHelp=configHelp)
+            parser.print_help()
+            sys.exit(2)
+
+        if len(args) : # or (len(sys.argv) == 1):
+            parser.print_help()
+            sys.exit(2)
+
+        try :
+            # sanitize arguments before writing defaults, check for missing arguments after:
+            #
+            self.validateAndSanitizeExistingOptions(options)
+
+            # write options object back into full iniSections object:
+            #
+            for k,v in vars(options).iteritems() :
+                if k == "isAllHelp" : continue
+                iniSections[primary_section][k] = v
+
+            self.validateOptionExistence(options)
+
+        except OptParseException, e :
+            noArgOrError(parser,str(e))
+
+        return options,iniSections
+
+
+
+    # private methods:
+
+    def _getOptionParser(self, defaults,configFileName, globalConfigDir, isAllHelp=False, version=None, configHelp=None) :
+        from optparse import OptionGroup, OptionParser, SUPPRESS_HELP
+
+        description=self.workflowDescription()+"""
+Configuration will produce a workflow run script which
+can execute the workflow on a single node or through
+sge and resume any interrupted execution.
+"""
+
+        globalConfigFile=os.path.join(globalConfigDir,configFileName)
+
+#        epilog="""Default parameters are read from the global config file '%s'
+#. Any values in the global config file can be overridden by submitting a
+#config file as a configuration argument (see --config flag). Any settings in
+#the config argument file update and take precedence over global config settings
+#in case of a repeated entry.
+#""" % (globalConfigFile)
+
+        # TODO: document why we need this format_description override?
+        class MyOptionParser(OptionParser) :
+            def format_description(self, formatter) :
+                 return self.description
+
+        parser = MyOptionParser(description=description, version=version)
+
+        parser.set_defaults(**defaults)
+
+        defaultConfigHelp="provide a configuration file to override defaults in global config file (%s)" % (globalConfigFile)
+        if configHelp is None :
+            configHelp = defaultConfigHelp
+
+        parser.add_option("--config", dest="userConfigPath",type="string", metavar="FILE",
+                          help=configHelp)
+        parser.add_option("--allHelp", action="store_true",dest="isAllHelp",
+                          help="show all extended/hidden options")
+
+        group = OptionGroup(parser,"Workflow options")
+        self.addWorkflowGroupOptions(group)
+        parser.add_option_group(group)
+
+        class Hack(object) : isAnyHelp=False
+
+        class MaybeHelpOptionGroup(OptionGroup) :
+            """
+            This extends option group to optionally hide all help
+            """
+            def add_option(self,*args,**kwargs) :
+                if (not isAllHelp) and \
+                   ('dest' in kwargs) and \
+                   (kwargs['dest'] in defaults) :
+                    kwargs['help'] = SUPPRESS_HELP
+                else :
+                    Hack.isAnyHelp=True
+                OptionGroup.add_option(self,*args, **kwargs)
+
+
+        secgroup = MaybeHelpOptionGroup(parser,"Extended options",
+                                        "These options are either unlikely to be reset after initial site configuration or only of interest for workflow development/debugging. They will not be printed here if a default exists unless --allHelp is specified")
+
+
+        def hideGroup(group) :
+            group.title=group.title+" (hidden)"
+            group.description=None
+
+        self.addExtendedGroupOptions(secgroup)
+        if not Hack.isAnyHelp: hideGroup(secgroup)
+        parser.add_option_group(secgroup)
+
+        return parser
+
diff --git a/src/python/lib/configureUtil.py b/src/python/lib/configureUtil.py
new file mode 100644
index 0000000..6259a63
--- /dev/null
+++ b/src/python/lib/configureUtil.py
@@ -0,0 +1,305 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+util -- simple utilities shared by workflow configurations
+"""
+
+
+import os.path
+
+from optparse import OptionParser
+from checkChromSet import checkChromSet
+
+
+
+class OptParseException(Exception):
+    pass
+
+
+
+def argToBool(x) :
+    """
+    convert argument of unknown type to a bool:
+    """
+    class FalseStrings :
+        val = ("", "0", "false", "f", "no", "n", "off")
+
+    if isinstance(x, basestring) :
+        return (x.lower() not in FalseStrings.val)
+    return bool(x)
+
+
+
+def safeSetBool(obj,dataname) :
+    """
+    translate ojb.dataname to a bool, or set to false if it doesn't exist
+    """
+    if hasattr(obj, dataname) :
+        setattr(obj, dataname, argToBool(getattr(obj, dataname)))
+    else :
+        setattr(obj, dataname, False)
+
+
+
+def pickleConfigSections(pickleConfigFile, configSections) :
+    """
+    write configSections object, expected to be a hash or hashes, into a pickle file
+    """
+    import pickle
+
+    pickle.dump(configSections, open(pickleConfigFile, "w"))
+
+
+
+def getConfigSections(pickleConfigFile) :
+    """
+    deserialize the config file and return a hash of hashes
+    """
+
+    import pickle
+
+    if not os.path.isfile(pickleConfigFile) : return {}
+
+    return pickle.load(open(pickleConfigFile))
+
+
+
+def getPrimarySectionOptions(configSections,primarySection) :
+
+    class WorkflowOptions(object) :
+        pass
+
+    options=WorkflowOptions()
+    if primarySection not in configSections : return options
+    for (k,v) in configSections[primarySection].items() :
+        setattr(options,k,v)
+
+    return options
+
+
+
+def getConfigWithPrimaryOptions(pickleConfigFile,primarySection) :
+    """
+    Deserialize the config pickle file and return (1) a class representing the
+    options of a section specified as primary (2) a hash of hashes representing
+    all sections
+    """
+    configSections=getConfigSections(pickleConfigFile)
+    options=getPrimarySectionOptions(configSections,primarySection)
+
+    return (options,configSections)
+
+
+
+def dumpIniSections(iniFile,iniSections) :
+    """
+    convert iniSections object, expected to be a hash or hashes, into an iniFile
+    """
+    from ConfigParser import SafeConfigParser
+
+    config = SafeConfigParser()
+    config.optionxform=str
+
+    def clean_value(v) :
+        if v is None : return ""
+        else :         return str(v)
+
+    for section in iniSections.keys():
+        config.add_section(section)
+        for k,v in iniSections[section].items() :
+            config.set(section,k,clean_value(v))
+
+    configfp=open(iniFile,"w")
+    config.write(configfp)
+    configfp.close()
+
+
+
+def getIniSections(iniFile) :
+    """
+    parse the ini iniFile and return a hash of hashes
+    """
+    from ConfigParser import SafeConfigParser
+
+    if not os.path.isfile(iniFile) : return {}
+
+    config = SafeConfigParser()
+    config.optionxform=str
+    config.read(iniFile)
+
+    iniSections = {}
+    for section in config.sections() :
+        iniSections[section] = {}
+        for (k,v) in config.items(section) :
+            if v == "" : v = None
+            iniSections[section][k] = v
+
+    return iniSections
+
+
+
+class EpilogOptionParser(OptionParser) :
+    """
+    This extension to OptionParser fakes the epilog feature introduced
+    in versions of OptionParser after python 2.4
+    """
+    def __init__(self, *args, **kwargs):
+        self.myepilog = None
+        try:
+            self.myepilog = kwargs.pop('epilog')
+        except KeyError:
+            pass
+        OptionParser.__init__(self,*args, **kwargs)
+
+    def print_help(self,*args,**kwargs) :
+        import sys,textwrap
+        OptionParser.print_help(self,*args, **kwargs)
+        if self.myepilog is not None :
+            sys.stdout.write("\n%s\n\n" % (textwrap.fill(self.myepilog)))
+
+
+
+def _validateFixArgHelper(x,label,checkfunc) :
+    if x is not None:
+        x=os.path.abspath(x)
+        if not checkfunc(x) :
+            raise OptParseException("Can't find %s: '%s'" % (label,x))
+    return x
+
+def validateFixExistingDirArg(argDir,label) :
+    """
+    convert directory arg to absolute path and check that it exists
+    """
+    return _validateFixArgHelper(argDir,label,os.path.isdir)
+
+def validateFixExistingFileArg(argFile,label) :
+    """
+    convert file arg to absolute path and check that it exists
+    """
+    return _validateFixArgHelper(argFile,label,os.path.isfile)
+
+
+def checkTabixIndexedFile(iname,label) :
+    assert(iname is not None)
+    tabixFile = iname + ".tbi"
+    if os.path.isfile(tabixFile) : return
+    raise OptParseException("Can't find expected %s index file: '%s'" % (label,tabixFile))
+
+
+def checkOptionalTabixIndexedFile(iname,label) :
+    """
+    if iname is not none, then we expect an tabix tbi file to accompany it, raise an exception otherwise
+    """
+    if iname is None : return
+    checkTabixIndexedFile(iname,label)
+
+
+def checkTabixListOption(opt,label) :
+    """
+    check a list of files which are expected to be tabix indexed
+    """
+    if opt is None : return
+    for val in opt :
+        checkTabixIndexedFile(val,label)
+
+
+def checkForBamIndex(bamFile):
+    """
+    make sure bam file has an index
+    """
+    # check for multi-extension index format PREFIX.bam -> PREFIX.bam.bai:
+    for ext in (".bai", ".csi", ".crai") :
+        indexFile=bamFile + ext
+        if os.path.isfile(indexFile) : return
+
+    # check for older short index format PREFIX.bam -> PREFIX.bai:
+    for (oldSuffix,newSuffix) in [ (".bam",".bai") ] :
+        if not bamFile.endswith(oldSuffix) : continue
+        indexFile=bamFile[:-len(oldSuffix)] + newSuffix
+        if os.path.isfile(indexFile) : return
+
+    raise OptParseException("Can't find any expected BAM/CRAM index files for: '%s'" % (bamFile))
+
+
+def groomBamList(bamList, sampleLabel):
+    """
+    check that bam/cram files exist and have an index, convert to abs path if they check out
+    """
+    if bamList is None : return
+    for (index,bamFile) in enumerate(bamList) :
+        bamList[index]=validateFixExistingFileArg(bamFile,"%s BAM/CRAM file" % (sampleLabel))
+        checkForBamIndex(bamList[index])
+
+
+class BamSetChecker(object):
+    """
+    check properties of the input bams as an aggregate set
+
+    for instance, same chrom order, no repeated files, etc...
+    """
+
+    def  __init__(self) :
+        self.bamList=[]
+        self.bamLabels=[]
+
+    def appendBams(self,inputBamList,inputLabel) :
+
+        if inputBamList is None : return
+        for inputBamFile in inputBamList :
+            self.bamList.append(inputBamFile)
+            self.bamLabels.append(inputLabel)
+
+    def check(self, htsfileBin, referenceFasta) :
+
+        checkChromSet(htsfileBin,
+                      referenceFasta,
+                      self.bamList,
+                      self.bamLabels,
+                      isReferenceLocked=True)
+
+        # check for repeated bam entries:
+        #
+        bamSet=set()
+        for bamFile in self.bamList :
+            if bamFile in bamSet :
+                raise OptParseException("Repeated input BAM/CRAM file: %s" % (bamFile))
+            bamSet.add(bamFile)
+
+
+def checkListArgRepeats(listName,itemLabel) :
+    """
+    screen a list argument for repeated entries
+    """
+    if listName is None : return
+    if len(set(listName)) != len(listName) :
+        raise OptParseException("Repeated %s entries" % (itemLabel))
+
+
+
+def assertOptionExists(arg,label) :
+    if arg is None:
+        raise OptParseException("No %s specified" % (label))
+
+
+
+def joinFile(*arg) :
+    filePath = os.path.join(*arg)
+    assert os.path.isfile(filePath)
+    return filePath
diff --git a/src/python/lib/estimateHardware.py b/src/python/lib/estimateHardware.py
new file mode 100644
index 0000000..a9a96fe
--- /dev/null
+++ b/src/python/lib/estimateHardware.py
@@ -0,0 +1,174 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+get cpu and memory capability info from linux and os x hosts
+"""
+
+
+import os
+
+
+
+class EstException(Exception):
+    pass
+
+
+
+def getNodeRealCoreCount() :
+    """
+    find the number of physical cpu cores
+
+    linux logic adapted from B Sickler
+
+    only works on linux and os x
+    """
+
+    import platform
+    if platform.system().find("Linux") > -1:
+
+        cpuinfo = '/proc/cpuinfo'
+        if not os.path.isfile(cpuinfo):
+            raise EstException("Can't read processor information from '%s'" % (cpuinfo))
+
+        physical_core_ids = set()
+        cpu_cores = set()
+
+        for line in open(cpuinfo):
+            l_array = [item.strip() for item in line.strip().split(': ')]
+            if len(l_array) < 2: continue
+            cpuid = l_array[0]
+            if cpuid == 'cpu cores':
+                cpu_cores.add(int(l_array[1]))
+            if cpuid == 'physical id':
+                physical_core_ids.add(l_array[1])
+
+        if len(physical_core_ids) == 0 :
+            raise EstException("No 'physical id' key found in '%s'" % (cpuinfo))
+        if len(cpu_cores) == 0 :
+            raise EstException("No 'cpu cores' key found in '%s'" % (cpuinfo))
+
+        return ( len(physical_core_ids) * cpu_cores.pop() )
+
+    elif platform.system().find("Darwin") > -1:
+        import subprocess
+        cmd=['sysctl', '-n', 'hw.physicalcpu']
+        proc=subprocess.Popen(cmd,shell=False,stdout=subprocess.PIPE)
+        for line in proc.stdout :
+            cpuCount=int(line.strip())
+            break
+
+        return cpuCount
+    else:
+        raise EstException("Can't determine total physical cores available on OS: '%s'" (platform.system()))
+
+
+
+
+def getNodeHyperthreadCoreCount():
+    """
+    return the number of hyperthread (or 'logical') cores on this host
+
+    linux logic taken from R Kelley's function in IsisWorkflow
+
+    only works on linux and os x
+    """
+
+    cpuCount = 0
+
+    import platform
+
+    if platform.system().find("Linux") > -1:
+        cname="/proc/cpuinfo"
+        if not os.path.isfile(cname):
+            raise EstException("Can't read processor information from %s" % (cname))
+
+        for line in open(cname):
+            if line.startswith("processor"): cpuCount += 1
+
+        if cpuCount == 0: raise EstException("Can't estimate processor core count from %s" % (cname))
+
+    elif platform.system().find("Darwin") > -1:
+        import subprocess
+        cmd=['sysctl', '-n', 'hw.logicalcpu']
+        proc=subprocess.Popen(cmd,shell=False,stdout=subprocess.PIPE)
+        for line in proc.stdout :
+            cpuCount=int(line.strip())
+            break
+    elif platform.system().find("Windows") > -1:
+        import multiprocessing
+        cpuCount = multiprocessing.cpu_count()
+    else:
+        raise EstException("Can't determine total logical cores available on OS: '%s'" (platform.system()))
+
+    return cpuCount
+
+
+
+def getNodeMemMb():
+    """
+    return total memory in Mbytes
+
+    linux logic taken from R Kelley's function in IsisWorkflow
+
+    only works on linux and os x
+    """
+
+    memMb = 0
+
+    import platform
+
+    if platform.system().find("Linux") > -1:
+        #
+        # get this from /proc/meminfo
+        #
+        mname="/proc/meminfo"
+        if not os.path.isfile(mname):
+            raise EstException("Can't read memory information from %s" % (mname))
+
+        line = open(mname).readline()
+        splat = line.rstrip().split()
+        if len(splat) != 3:
+            raise EstException("Unexpected format in %s" % (mname))
+
+        try:
+            memMb = 1+((int(splat[1])-1)/1024)
+        except:
+            raise EstException("Unexpected format in %s" % (mname))
+    elif platform.system().find("Darwin") > -1:
+        import subprocess
+        cmd=['sysctl', '-n', 'hw.memsize']
+        proc=subprocess.Popen(cmd,shell=False,stdout=subprocess.PIPE)
+        for line in proc.stdout :
+            memMb=int(line.strip())/(1024*1024)
+            break
+    elif platform.system().find("Windows") > -1:
+        process = os.popen('wmic memorychip get capacity')
+        result = process.read()
+        process.close()
+        totalMem = 0
+        for m in result.split("  \r\n")[1:-1]:
+            totalMem += int(m)
+        memMb = totalMem / (1024**2)
+    else:
+        raise EstException("Can't determine total memory available on OS: '%s'" (platform.system()))
+
+    return memMb
+
+
diff --git a/src/python/lib/makeRunScript.py b/src/python/lib/makeRunScript.py
new file mode 100644
index 0000000..1a9654b
--- /dev/null
+++ b/src/python/lib/makeRunScript.py
@@ -0,0 +1,290 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+This provides a function to auto-generate a workflow run script.
+"""
+
+import os, sys
+
+from configureUtil import pickleConfigSections
+
+
+
+def makeRunScript(scriptFile, workflowModulePath, workflowClassName, primaryConfigSection, configSections, pythonBin=None) :
+    """
+    This function generates the python workflow runscript
+
+    The auto-generated python script presents the user with options to
+    run and/or continue their workflow, and reads all workflow
+    configuration info from an ini file.
+
+    scriptFile -- file name of the runscript to create
+    workflowModulePath -- the python module containing the workflow class
+    workflowClassName -- the workflow class name
+    primaryConfigSection -- the section used to create the primary workflow parameter object
+    configSections -- a hash or hashes representing all configuration info
+    @param pythonBin: optionally specify a custom python interpreter for the script she-bang
+    """
+
+    assert os.path.isdir(os.path.dirname(scriptFile))
+    assert os.path.isfile(workflowModulePath)
+
+    workflowModulePath=os.path.abspath(workflowModulePath)
+    workflowModuleDir=os.path.dirname(workflowModulePath)
+    workflowModuleName=os.path.basename(workflowModulePath)
+    pyExt=".py"
+    if workflowModuleName.endswith(pyExt) :
+        workflowModuleName=workflowModuleName[:-len(pyExt)]
+
+    # dump inisections to a file
+    pickleConfigFile=scriptFile+".config.pickle"
+    pickleConfigSections(pickleConfigFile,configSections)
+
+    sfp=open(scriptFile,"w")
+
+    if pythonBin is None :
+        pythonBin="/usr/bin/env python"
+
+    sfp.write(runScript1 % (pythonBin, " ".join(sys.argv),workflowModuleDir,workflowModuleName,workflowClassName))
+
+    sfp.write('\n')
+    sfp.write(runScript2)
+    sfp.write('\n')
+    sfp.write(runScript3)
+    sfp.write('\n')
+
+    sfp.write('main(r"%s","%s",%s)\n' % (pickleConfigFile, primaryConfigSection, workflowClassName))
+    sfp.write('\n')
+    sfp.close()
+    os.chmod(scriptFile,0755)
+
+
+# this is the old version of makeRunScript which used reflection instead of simple text blocks. it is theoretically better,
+# at production scale, we found that python reflection is not 100% reliable. Intermitent, hard to pin down failures in this
+# process suggest subtle python interpreter bugs in this feature.
+oldLogic="""
+    import inspect
+
+    def auditInspection(label,objectName) :
+        assert(objectName is not None)
+        sfp.write("# inspecting from %s '%s' in file " % (label,str(objectName)))
+        sfp.write(inspect.getsourcefile(objectName))
+        sfp.write('\n')
+
+
+    def inspectObject(objectName) :
+        assert(objectName is not None)
+        sfp.write('#\n')
+        auditInspection('object',objectName)
+        sfp.write('#\n')
+        sfp.write(inspect.getsource(objectName))
+        sfp.write('\n')
+
+    # make sure we're inspectnig from the current module
+    # (motivated by a rare cross-inspection bug)
+    current_module = sys.modules[__name__]
+    sfp.write('#\n')
+    auditInspection('module',current_module)
+    sfp.write('#\n')
+    sfp.write('\n')
+
+    inspectObject(current_module.get_run_options)
+    inspectObject(current_module.main)
+    sfp.write('main("%s","%s",%s)\n' % (pickleConfigFile, primaryConfigSection, workflowClassName))
+    sfp.write('\n')
+    sfp.close()
+    os.chmod(scriptFile,0755)
+"""
+
+
+runScript1="""#!%s
+# Workflow run script auto-generated by command: '%s'
+#
+
+import os, sys
+
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+sys.path.append(r'%s')
+
+from %s import %s
+
+"""
+
+
+runScript2="""
+def get_run_options(workflowClassName) :
+
+    from optparse import OptionGroup, SUPPRESS_HELP
+
+    from configBuildTimeInfo import workflowVersion
+    from configureUtil import EpilogOptionParser
+    from estimateHardware import EstException, getNodeHyperthreadCoreCount, getNodeMemMb
+
+    sgeDefaultCores=workflowClassName.runModeDefaultCores('sge')
+
+    epilog=\"\"\"Note this script can be re-run to continue the workflow run in case of interruption.
+Also note that dryRun option has limited utility when task definition depends on upstream task
+results -- in this case the dry run will not cover the full 'live' run task set.\"\"\"
+
+    parser = EpilogOptionParser(description="Version: %s" % (workflowVersion), epilog=epilog, version=workflowVersion)
+
+
+    parser.add_option("-m", "--mode", type="string",dest="mode",
+                      help="select run mode (local|sge)")
+    parser.add_option("-q", "--queue", type="string",dest="queue",
+                      help="specify scheduler queue name")
+    parser.add_option("-j", "--jobs", type="string",dest="jobs",
+                  help="number of jobs, must be an integer or 'unlimited' (default: Estimate total cores on this node for local mode, %s for sge mode)" % (sgeDefaultCores))
+    parser.add_option("-g","--memGb", type="string",dest="memGb",
+                  help="gigabytes of memory available to run workflow -- only meaningful in local mode, must be an integer (default: Estimate the total memory for this node for local mode, 'unlimited' for sge mode)")
+    parser.add_option("-d","--dryRun", dest="isDryRun",action="store_true",default=False,
+                      help="dryRun workflow code without actually running command-tasks")
+    parser.add_option("--quiet", dest="isQuiet",action="store_true",default=False,
+                      help="Don't write any log output to stderr (but still write to workspace/pyflow.data/logs/pyflow_log.txt)")
+
+    def isLocalSmtp() :
+        import smtplib
+        try :
+            smtplib.SMTP('localhost')
+        except :
+            return False
+        return True
+
+    isEmail = isLocalSmtp()
+    emailHelp = SUPPRESS_HELP
+    if isEmail :
+        emailHelp="send email notification of job completion status to this address (may be provided multiple times for more than one email address)"
+
+    parser.add_option("-e","--mailTo", type="string",dest="mailTo",action="append",help=emailHelp)
+
+    debug_group = OptionGroup(parser,"development debug options")
+    debug_group.add_option("--rescore", dest="isRescore",action="store_true",default=False,
+                          help="Reset task list to re-run hypothesis generation and scoring without resetting graph generation.")
+
+    parser.add_option_group(debug_group)
+
+    ext_group = OptionGroup(parser,"extended portability options (should not be needed by most users)")
+    ext_group.add_option("--maxTaskRuntime", type="string", metavar="hh:mm:ss",
+                      help="Specify scheduler max runtime per task, argument is provided to the 'h_rt' resource limit if using SGE (no default)")
+
+    parser.add_option_group(ext_group)
+
+    (options,args) = parser.parse_args()
+
+    if not isEmail : options.mailTo = None
+
+    if len(args) :
+        parser.print_help()
+        sys.exit(2)
+
+    if options.mode is None :
+        parser.print_help()
+        sys.exit(2)
+    elif options.mode not in ["local","sge"] :
+        parser.error("Invalid mode. Available modes are: local, sge")
+
+    if options.jobs is None :
+        if options.mode == "sge" :
+            options.jobs = sgeDefaultCores
+        else :
+            try :
+                options.jobs = getNodeHyperthreadCoreCount()
+            except EstException:
+                parser.error("Failed to estimate cores on this node. Please provide job count argument (-j).")
+    if options.jobs != "unlimited" :
+        options.jobs=int(options.jobs)
+        if options.jobs <= 0 :
+            parser.error("Jobs must be 'unlimited' or an integer greater than 1")
+
+    # note that the user sees gigs, but we set megs
+    if options.memGb is None :
+        if options.mode == "sge" :
+            options.memMb = "unlimited"
+        else :
+            try :
+                options.memMb = getNodeMemMb()
+            except EstException:
+                parser.error("Failed to estimate available memory on this node. Please provide available gigabyte argument (-g).")
+    elif options.memGb != "unlimited" :
+        options.memGb=int(options.memGb)
+        if options.memGb <= 0 :
+            parser.error("memGb must be 'unlimited' or an integer greater than 1")
+        options.memMb = 1024*options.memGb
+    else :
+        options.memMb = options.memGb
+
+    options.schedulerArgList=[]
+    if options.queue is not None :
+        options.schedulerArgList.extend(["-q",options.queue])
+    if options.maxTaskRuntime is not None :
+        options.schedulerArgList.extend(["-l","h_rt="+options.maxTaskRuntime])
+
+    options.resetTasks=[]
+    if options.isRescore :
+        options.resetTasks.append("makeHyGenDir")
+
+    return options
+"""
+
+
+runScript3="""
+def main(pickleConfigFile, primaryConfigSection, workflowClassName) :
+
+    from configureUtil import getConfigWithPrimaryOptions
+
+    runOptions=get_run_options(workflowClassName)
+    flowOptions,configSections=getConfigWithPrimaryOptions(pickleConfigFile,primaryConfigSection)
+
+    # new logs and marker files to assist automated workflow monitoring:
+    warningpath=os.path.join(flowOptions.runDir,"workflow.warning.log.txt")
+    errorpath=os.path.join(flowOptions.runDir,"workflow.error.log.txt")
+    exitpath=os.path.join(flowOptions.runDir,"workflow.exitcode.txt")
+
+    # the exit path should only exist once the workflow completes:
+    if os.path.exists(exitpath) :
+        if not os.path.isfile(exitpath) :
+            raise Exception("Unexpected filesystem item: '%s'" % (exitpath))
+        os.unlink(exitpath)
+
+    wflow = workflowClassName(flowOptions,configSections)
+
+    retval=1
+    try:
+        retval=wflow.run(mode=runOptions.mode,
+                         nCores=runOptions.jobs,
+                         memMb=runOptions.memMb,
+                         dataDirRoot=flowOptions.workDir,
+                         mailTo=runOptions.mailTo,
+                         isContinue="Auto",
+                         isForceContinue=True,
+                         isDryRun=runOptions.isDryRun,
+                         isQuiet=runOptions.isQuiet,
+                         schedulerArgList=runOptions.schedulerArgList,
+                         resetTasks=runOptions.resetTasks,
+                         successMsg=wflow.getSuccessMessage(),
+                         warningLogFile=warningpath,
+                         errorLogFile=errorpath)
+    finally:
+        exitfp=open(exitpath,"w")
+        exitfp.write("%i\\n" % (retval))
+        exitfp.close()
+
+    sys.exit(retval)
+"""
diff --git a/src/python/lib/mantaOptions.py b/src/python/lib/mantaOptions.py
new file mode 100644
index 0000000..41281dc
--- /dev/null
+++ b/src/python/lib/mantaOptions.py
@@ -0,0 +1,164 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+Workflow configuration options shared by multiple
+configuration scripts.
+"""
+
+import os,sys
+
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+scriptName=os.path.basename(__file__)
+
+sys.path.append(scriptDir)
+
+from configureOptions import ConfigureWorkflowOptions
+from configureUtil import assertOptionExists, joinFile, OptParseException, validateFixExistingDirArg, validateFixExistingFileArg
+from workflowUtil import exeFile, parseGenomeRegion
+
+
+
+def cleanLocals(locals_dict) :
+    """
+    When passed a locals() dictionary, clean out all of the hidden keys and return
+    """
+
+    return dict((k,v) for (k,v) in locals_dict.items() if not k.startswith("__") and k != "self")
+
+
+
+class MantaWorkflowOptionsBase(ConfigureWorkflowOptions) :
+
+    validAlignerModes = ["bwa","isaac"]
+
+    def addWorkflowGroupOptions(self,group) :
+        group.add_option("--referenceFasta",type="string",metavar="FILE",
+                         help="samtools-indexed reference fasta file [required]")
+        group.add_option("--runDir", type="string",metavar="DIR",
+                         help="Run script and run output will be written to this directory [required] (default: %default)")
+
+    def addExtendedGroupOptions(self,group) :
+        group.add_option("--scanSizeMb", type="int", metavar="INT",
+                         help="Maximum sequence region size (in megabases) scanned by each task during "
+                         "SV Locus graph generation. (default: %default)")
+        group.add_option("--region", type="string",dest="regionStrList",metavar="REGION", action="append",
+                         help="Limit the analysis to a region of the genome for debugging purposes. "
+                              "If this argument is provided multiple times all specified regions will "
+                              "be analyzed together. All regions must be non-overlapping to get a "
+                              "meaningful result. Examples: '--region chr20' (whole chromosome), "
+                              "'--region chr2:100-2000 --region chr3:2500-3000' (two regions)'")
+
+        ConfigureWorkflowOptions.addExtendedGroupOptions(self,group)
+
+
+    def getOptionDefaults(self) :
+        """
+        Set option defaults.
+
+        Every local variable in this method becomes part of the default hash
+        """
+
+        configCommandLine=sys.argv
+
+        alignerMode = "isaac"
+
+        libexecDir=os.path.abspath(os.path.join(scriptDir,"@THIS_RELATIVE_LIBEXECDIR@"))
+        assert os.path.isdir(libexecDir)
+
+        bgzipBin=joinFile(libexecDir,exeFile("bgzip"))
+        htsfileBin=joinFile(libexecDir,exeFile("htsfile"))
+        tabixBin=joinFile(libexecDir,exeFile("tabix"))
+        samtoolsBin=joinFile(libexecDir,exeFile("samtools"))
+
+        mantaStatsBin=joinFile(libexecDir,exeFile("GetAlignmentStats"))
+        mantaMergeStatsBin=joinFile(libexecDir,exeFile("MergeAlignmentStats"))
+        getChromDepthBin=joinFile(libexecDir,exeFile("GetChromDepth"))
+        mantaGraphBin=joinFile(libexecDir,exeFile("EstimateSVLoci"))
+        mantaGraphMergeBin=joinFile(libexecDir,exeFile("MergeSVLoci"))
+        mantaStatsMergeBin=joinFile(libexecDir,exeFile("MergeEdgeStats"))
+        mantaGraphCheckBin=joinFile(libexecDir,exeFile("CheckSVLoci"))
+        mantaHyGenBin=joinFile(libexecDir,exeFile("GenerateSVCandidates"))
+        mantaGraphStatsBin=joinFile(libexecDir,exeFile("SummarizeSVLoci"))
+        mantaStatsSummaryBin=joinFile(libexecDir,exeFile("SummarizeAlignmentStats"))
+
+        mergeChromDepth=joinFile(libexecDir,"mergeChromDepth.py")
+        mantaSortVcf=joinFile(libexecDir,"sortVcf.py")
+        mantaExtraSmallVcf=joinFile(libexecDir,"extractSmallIndelCandidates.py")
+        mantaPloidyFilter=joinFile(libexecDir,"ploidyFilter.py")
+        mantaSortEdgeLogs=joinFile(libexecDir,"sortEdgeLogs.py")
+        catScript=joinFile(libexecDir,"cat.py")
+        vcfCmdlineSwapper=joinFile(libexecDir,"vcfCmdlineSwapper.py")
+        mantaSortBam=joinFile(libexecDir,"sortBam.py")
+        mantaMergeBam=joinFile(libexecDir,"mergeBam.py")
+        mantaFilterBam=joinFile(libexecDir,"filterBam.py")
+
+        # default memory request per process-type
+        #
+        # where different values are provided for SGE and local runs note:
+        #  1. for SGE the memory limits must be greater than the highest memory use ever
+        #      expected in a production run. The consequence of exceeding this limit is a failed
+        #      run.
+        #   2. for localhost the memory usage should be at least above the highest mean memory
+        #       use ever expected in a production run. The consequence of exceeding the mean is
+        #       a slow run due to swapping.
+        #
+        estimateMemMb=2*1024
+        mergeMemMb=4*1024
+        hyGenSGEMemMb=4*1024
+        hyGenLocalMemMb=2*1024
+
+        scanSizeMb = 12
+
+        return cleanLocals(locals())
+
+
+
+    def validateAndSanitizeExistingOptions(self,options) :
+
+        options.runDir=os.path.abspath(options.runDir)
+
+        # check alignerMode:
+        if options.alignerMode is not None :
+            options.alignerMode = options.alignerMode.lower()
+            if options.alignerMode not in self.validAlignerModes :
+                raise OptParseException("Invalid aligner mode: '%s'" % options.alignerMode)
+
+        options.referenceFasta=validateFixExistingFileArg(options.referenceFasta,"reference")
+
+        # check for reference fasta index file:
+        if options.referenceFasta is not None :
+            faiFile=options.referenceFasta + ".fai"
+            if not os.path.isfile(faiFile) :
+                raise OptParseException("Can't find expected fasta index file: '%s'" % (faiFile))
+
+        if (options.regionStrList is None) or (len(options.regionStrList) == 0) :
+            options.genomeRegionList = None
+        else :
+            options.genomeRegionList = [parseGenomeRegion(r) for r in options.regionStrList]
+
+
+    def validateOptionExistence(self,options) :
+
+        assertOptionExists(options.runDir,"run directory")
+
+        assertOptionExists(options.alignerMode,"aligner mode")
+        assertOptionExists(options.referenceFasta,"reference fasta file")
+
+
diff --git a/src/python/lib/mantaWorkflow.py b/src/python/lib/mantaWorkflow.py
new file mode 100644
index 0000000..42919a1
--- /dev/null
+++ b/src/python/lib/mantaWorkflow.py
@@ -0,0 +1,757 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+Manta SV discovery workflow
+"""
+
+
+import os.path
+import shutil
+import sys
+
+# add script path to pull in utils in same directory:
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+sys.path.append(os.path.abspath(scriptDir))
+
+# add pyflow path:
+pyflowDir=os.path.join(scriptDir,"pyflow")
+sys.path.append(os.path.abspath(pyflowDir))
+
+from configBuildTimeInfo import workflowVersion
+from configureUtil import getIniSections,dumpIniSections
+from pyflow import WorkflowRunner
+from sharedWorkflow import getMkdirCmd, getMvCmd, getRmCmd, getRmdirCmd, \
+                           quoteStringList, runDepthFromAlignments
+from workflowUtil import checkFile, ensureDir, isWindows, preJoin, which, \
+                         getNextGenomeSegment, getFastaChromOrderSize, cleanPyEnv
+
+
+__version__ = workflowVersion
+
+
+
+def runStats(self,taskPrefix="",dependencies=None) :
+
+    statsPath=self.paths.getStatsPath()
+    statsFilename=os.path.basename(statsPath)
+
+    tmpStatsDir=statsPath+".tmpdir"
+
+    makeTmpStatsDirCmd = getMkdirCmd() + [tmpStatsDir]
+    dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), makeTmpStatsDirCmd, dependencies=dependencies, isForceLocal=True)
+
+    tmpStatsFiles = []
+    statsTasks = set()
+
+    for (bamIndex,bamPath) in enumerate(self.params.normalBamList + self.params.tumorBamList) :
+        indexStr = str(bamIndex).zfill(3)
+        tmpStatsFiles.append(os.path.join(tmpStatsDir,statsFilename+"."+ indexStr +".xml"))
+
+        cmd = [ self.params.mantaStatsBin ]
+        cmd.extend(["--output-file",tmpStatsFiles[-1]])
+        cmd.extend(["--align-file",bamPath])
+
+        statsTasks.add(self.addTask(preJoin(taskPrefix,"generateStats_"+indexStr),cmd,dependencies=dirTask))
+
+    cmd = [ self.params.mantaMergeStatsBin ]
+    cmd.extend(["--output-file",statsPath])
+    for tmpStatsFile in tmpStatsFiles :
+        cmd.extend(["--align-stats-file",tmpStatsFile])
+
+    mergeTask = self.addTask(preJoin(taskPrefix,"mergeStats"),cmd,dependencies=statsTasks,isForceLocal=True)
+
+    nextStepWait = set()
+    nextStepWait.add(mergeTask)
+
+    if not self.params.isRetainTempFiles :
+        rmStatsTmpCmd = getRmdirCmd() + [tmpStatsDir]
+        rmTask=self.addTask(preJoin(taskPrefix,"rmTmpDir"),rmStatsTmpCmd,dependencies=mergeTask, isForceLocal=True)
+
+    # summarize stats in format that's easier for human review
+    cmd = [self.params.mantaStatsSummaryBin]
+    cmd.extend(["--align-stats ", statsPath])
+    cmd.extend(["--output-file", self.paths.getStatsSummaryPath()])
+    self.addTask(preJoin(taskPrefix,"summarizeStats"),cmd,dependencies=mergeTask)
+
+    return nextStepWait
+
+
+
+def mantaRunDepthFromAlignments(self,taskPrefix="getChromDepth",dependencies=None):
+    bamList=[]
+    if len(self.params.normalBamList) :
+        bamList = self.params.normalBamList
+    elif len(self.params.tumorBamList) :
+        bamList = self.params.tumorBamList
+    else :
+        return set()
+
+    outputPath=self.paths.getChromDepth()
+    return runDepthFromAlignments(self, bamList, outputPath, taskPrefix, dependencies)
+
+
+
+def runLocusGraph(self,taskPrefix="",dependencies=None):
+    """
+    Create the full SV locus graph
+    """
+
+    statsPath=self.paths.getStatsPath()
+    graphPath=self.paths.getGraphPath()
+    graphStatsPath=self.paths.getGraphStatsPath()
+
+    graphFilename=os.path.basename(graphPath)
+
+    tmpGraphDir=self.paths.getTmpGraphDir()
+
+    makeTmpGraphDirCmd = getMkdirCmd() + [tmpGraphDir]
+    dirTask = self.addTask(preJoin(taskPrefix,"makeGraphTmpDir"), makeTmpGraphDirCmd, dependencies=dependencies, isForceLocal=True)
+
+    tmpGraphFiles = []
+    graphTasks = set()
+
+    def getGenomeSegmentGroups(params) :
+        """
+        Iterate segment groups and 'clump' small contigs together
+        """
+
+        minSegmentGroupSize=200000
+        group = []
+        headSize = 0
+        for gseg in getNextGenomeSegment(self.params) :
+            if headSize+gseg.size() <= minSegmentGroupSize :
+                group.append(gseg)
+                headSize += gseg.size()
+            else :
+                if len(group) != 0 : yield(group)
+                group = [gseg]
+                headSize = gseg.size()
+        if len(group) != 0 : yield(group)
+
+    for gsegGroup in getGenomeSegmentGroups(self.params) :
+        assert(len(gsegGroup) != 0)
+        gid=gsegGroup[0].id
+        if len(gsegGroup) > 1 :
+            gid += "_to_"+gsegGroup[-1].id
+        tmpGraphFiles.append(self.paths.getTmpGraphFile(gid))
+        graphCmd = [ self.params.mantaGraphBin ]
+        graphCmd.extend(["--output-file", tmpGraphFiles[-1]])
+        graphCmd.extend(["--align-stats",statsPath])
+        for gseg in gsegGroup :
+            graphCmd.extend(["--region",gseg.bamRegion])
+        graphCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize])
+        graphCmd.extend(["--min-edge-observations", self.params.minEdgeObservations])
+        graphCmd.extend(["--ref",self.params.referenceFasta])
+        for bamPath in self.params.normalBamList :
+            graphCmd.extend(["--align-file",bamPath])
+        for bamPath in self.params.tumorBamList :
+            graphCmd.extend(["--tumor-align-file",bamPath])
+
+        if self.params.isHighDepthFilter :
+            graphCmd.extend(["--chrom-depth", self.paths.getChromDepth()])
+
+        if self.params.isIgnoreAnomProperPair :
+            graphCmd.append("--ignore-anom-proper-pair")
+        if self.params.isRNA :
+            graphCmd.append("--rna")
+
+        graphTask=preJoin(taskPrefix,"makeLocusGraph_"+gid)
+        graphTasks.add(self.addTask(graphTask,graphCmd,dependencies=dirTask,memMb=self.params.estimateMemMb))
+
+    if len(tmpGraphFiles) == 0 :
+        raise Exception("No SV Locus graphs to create. Possible target region parse error.")
+
+    tmpGraphFileList = self.paths.getTmpGraphFileListPath()
+    tmpGraphFileListTask = preJoin(taskPrefix,"mergeLocusGraphInputList")
+    self.addWorkflowTask(tmpGraphFileListTask,listFileWorkflow(tmpGraphFileList,tmpGraphFiles),dependencies=graphTasks)
+
+    mergeCmd = [ self.params.mantaGraphMergeBin ]
+    mergeCmd.extend(["--output-file", graphPath])
+    mergeCmd.extend(["--graph-file-list",tmpGraphFileList])
+    mergeTask = self.addTask(preJoin(taskPrefix,"mergeLocusGraph"),mergeCmd,dependencies=tmpGraphFileListTask,memMb=self.params.mergeMemMb)
+
+    # Run a separate process to rigorously check that the final graph is valid, the sv candidate generators will check as well, but
+    # this makes the check much more clear:
+
+    checkCmd = [ self.params.mantaGraphCheckBin ]
+    checkCmd.extend(["--graph-file", graphPath])
+    checkTask = self.addTask(preJoin(taskPrefix,"checkLocusGraph"),checkCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)
+
+    if not self.params.isRetainTempFiles :
+        rmGraphTmpCmd = getRmdirCmd() + [tmpGraphDir]
+        rmTask=self.addTask(preJoin(taskPrefix,"rmTmpDir"),rmGraphTmpCmd,dependencies=mergeTask)
+
+    graphStatsCmd  = [self.params.mantaGraphStatsBin,"--global"]
+    graphStatsCmd.extend(["--graph-file",graphPath])
+    graphStatsCmd.extend(["--output-file",graphStatsPath])
+
+    graphStatsTask = self.addTask(preJoin(taskPrefix,"locusGraphStats"),graphStatsCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)
+
+    nextStepWait = set()
+    nextStepWait.add(checkTask)
+    return nextStepWait
+
+
+
+class listFileWorkflow(WorkflowRunner) :
+    """
+    creates a file which enumerates the values in a list, one line per item
+    """
+
+    def __init__(self2, listFile, listItems) :
+        self2.listFile = listFile
+        self2.listItems = listItems
+
+    def workflow(self2) :
+        fp = open(self2.listFile, "w")
+        for listItem in self2.listItems :
+            fp.write(listItem+"\n")
+
+
+
+def sortBams(self, sortBamTasks, taskPrefix="", binStr="", isNormal=True, bamIdx=0, dependencies=None):
+
+    if isNormal:
+        bamList = self.params.normalBamList
+    else:
+        bamList = self.params.tumorBamList
+
+    for bamPath in bamList:
+        supportBam = self.paths.getSupportBamPath(bamIdx, binStr)
+        sortedBam = os.path.splitext(self.paths.getSortedSupportBamPath(bamIdx, binStr))[0]
+        # first check the existence of the supporting bam
+        # then sort the bam only if it exists
+        sortBamCmd = [ sys.executable,"-E", self.params.mantaSortBam,
+                      self.params.samtoolsBin, supportBam, sortedBam ]
+
+        sortBamTask = preJoin(taskPrefix, "sortEvidenceBam_%s_%s" % (binStr, bamIdx))
+        sortBamTasks.add(self.addTask(sortBamTask, sortBamCmd, dependencies=dependencies))
+        bamIdx += 1
+
+    return bamIdx
+
+
+def sortAllVcfs(self, taskPrefix="", dependencies=None) :
+    """sort/prep final vcf outputs"""
+
+    nextStepWait = set()
+
+    def getVcfSortCmd(vcfListFile, outPath, isDiploid) :
+        cmd  = "\"%s\" -E \"%s\" -u " % (sys.executable, self.params.mantaSortVcf)
+        cmd += "-f \"%s\"" % (vcfListFile)
+
+        # apply the ploidy filter to diploid variants
+        if isDiploid:
+            tempVcf = self.paths.getTempDiploidPath()
+            cmd += " > \"%s\"" % (tempVcf)
+            cmd += " && \"%s\" -E \"%s\" \"%s\"" % (sys.executable, self.params.mantaPloidyFilter, tempVcf)
+
+        cmd += " | \"%s\" -c > \"%s\"" % (self.params.bgzipBin, outPath)
+
+        if isDiploid:
+            cmd += " && " + " ".join(getRmCmd()) + " \"%s\"" % (self.paths.getTempDiploidPath())
+        return cmd
+
+    def getVcfTabixCmd(vcfPath) :
+        return [self.params.tabixBin,"-f","-p","vcf", vcfPath]
+
+
+    def sortVcfs(pathList, outPath, label, isDiploid=False) :
+        if len(pathList) == 0 : return set()
+
+        # make header modifications to first vcf in list of files to be sorted:
+        headerFixTask=preJoin(taskPrefix,"fixVcfHeader_"+label)
+        def getHeaderFixCmd(fileName) :
+            tmpName=fileName+".reheader.tmp"
+            cmd  = "\"%s\" -E \"%s\"" % (sys.executable, self.params.vcfCmdlineSwapper)
+            cmd += ' "' + " ".join(self.params.configCommandLine) + '"'
+            cmd += " < \"%s\" > \"%s\"" % (fileName,tmpName)
+            cmd += " && " + " ".join(getMvCmd()) +  " \"%s\" \"%s\"" % (tmpName, fileName)
+            return cmd
+
+        self.addTask(headerFixTask, getHeaderFixCmd(pathList[0]), dependencies=dependencies, isForceLocal=True)
+
+        vcfListFile = self.paths.getVcfListPath(label)
+        inputVcfTask = self.addWorkflowTask(preJoin(taskPrefix,label+"InputList"),listFileWorkflow(vcfListFile,pathList),dependencies=headerFixTask)
+
+        sortCmd = getVcfSortCmd(vcfListFile, outPath, isDiploid)
+        sortTask=self.addTask(preJoin(taskPrefix,"sort_"+label),sortCmd,dependencies=inputVcfTask)
+
+        nextStepWait.add(self.addTask(preJoin(taskPrefix,"tabix_"+label),getVcfTabixCmd(outPath),dependencies=sortTask,isForceLocal=True))
+        return sortTask
+
+
+    candSortTask = sortVcfs(self.candidateVcfPaths,
+                            self.paths.getSortedCandidatePath(),
+                            "sortCandidateSV")
+    sortVcfs(self.diploidVcfPaths,
+             self.paths.getSortedDiploidPath(),
+             "sortDiploidSV",
+             isDiploid=True)
+    sortVcfs(self.somaticVcfPaths,
+             self.paths.getSortedSomaticPath(),
+             "sortSomaticSV")
+    sortVcfs(self.tumorVcfPaths,
+             self.paths.getSortedTumorPath(),
+             "sortTumorSV")
+
+    def getExtractSmallCmd(maxSize, inPath, outPath) :
+        cmd  = "\"%s\" -dc \"%s\"" % (self.params.bgzipBin, inPath)
+        cmd += " | \"%s\" -E \"%s\" --maxSize %i" % (sys.executable, self.params.mantaExtraSmallVcf, maxSize)
+        cmd += " | \"%s\" -c > \"%s\"" % (self.params.bgzipBin, outPath)
+        return cmd
+
+    def extractSmall(inPath, outPath) :
+        maxSize = int(self.params.minScoredVariantSize) - 1
+        if maxSize < 1 : return
+        smallCmd = getExtractSmallCmd(maxSize, inPath, outPath)
+        smallTask=self.addTask(preJoin(taskPrefix,"extractSmallIndels"), smallCmd, dependencies=candSortTask, isForceLocal=True)
+        nextStepWait.add(self.addTask(smallTask+"_tabix", getVcfTabixCmd(outPath), dependencies=smallTask, isForceLocal=True))
+
+    extractSmall(self.paths.getSortedCandidatePath(), self.paths.getSortedCandidateSmallIndelsPath())
+
+    return nextStepWait
+
+
+def mergeSupportBams(self, mergeBamTasks, taskPrefix="", isNormal=True, bamIdx=0, dependencies=None) :
+
+    if isNormal:
+        bamList = self.params.normalBamList
+    else:
+        bamList = self.params.tumorBamList
+
+    for bamPath in bamList:
+        # merge support bams
+        mergedSamFile = self.paths.getMergedSupportSamPath(bamIdx)
+        mergeCmd = [ sys.executable,"-E", self.params.mantaMergeBam,
+                     self.params.samtoolsBin,
+                     self.paths.getSortedSupportBamMask(bamIdx),
+                     self.paths.getMergedSupportBamPath(bamIdx),
+                     mergedSamFile,
+                     self.paths.getSupportBamListPath(bamIdx) ]
+
+        mergeBamTask=self.addTask(preJoin(taskPrefix,"merge_evidenceBam_%s" % (bamIdx)),
+                                  mergeCmd, dependencies=dependencies)
+        mergeBamTasks.add(mergeBamTask)
+
+        # filter the merged sam
+        filteredBamFile = self.paths.getFinalSupportBamPath(bamPath)
+        filterCmd = [ sys.executable,"-E", self.params.mantaFilterBam,
+                     self.params.samtoolsBin,
+                     self.paths.getSortedCandidatePath(),
+                     mergedSamFile,
+                     self.paths.getfilteredSupportSamPath(bamIdx),
+                     filteredBamFile ]
+        filterBamTask = self.addTask(preJoin(taskPrefix,"filter_evidenceSam_%s" % (bamIdx)),
+                                     filterCmd, dependencies=mergeBamTask)
+        mergeBamTasks.add(filterBamTask)
+
+        # index the filtered bam
+        indexCmd = [ self.params.samtoolsBin, "index", filteredBamFile ]
+        indexBamTask = self.addTask(preJoin(taskPrefix,"index_evidenceBam_%s" % (bamIdx)),
+                                    indexCmd, dependencies=filterBamTask)
+        mergeBamTasks.add(indexBamTask)
+
+        bamIdx += 1
+
+    return bamIdx
+
+
+def runHyGen(self, taskPrefix="", dependencies=None) :
+    """
+    Run hypothesis generation on each SV locus
+    """
+
+    import copy
+
+    statsPath=self.paths.getStatsPath()
+    graphPath=self.paths.getGraphPath()
+    hygenDir=self.paths.getHyGenDir()
+
+    makeHyGenDirCmd = getMkdirCmd() + [hygenDir]
+    dirTask = self.addTask(preJoin(taskPrefix,"makeHyGenDir"), makeHyGenDirCmd, dependencies=dependencies, isForceLocal=True)
+
+    isSomatic = (len(self.params.normalBamList) and len(self.params.tumorBamList))
+    isTumorOnly = ((not isSomatic) and len(self.params.tumorBamList))
+
+    hyGenMemMb = self.params.hyGenLocalMemMb
+    if self.getRunMode() == "sge" :
+        hyGenMemMb = self.params.hyGenSGEMemMb
+
+    hygenTasks=set()
+    if self.params.isGenerateSupportBam :
+        sortBamVcfTasks = set()
+
+    self.candidateVcfPaths = []
+    self.diploidVcfPaths = []
+    self.somaticVcfPaths = []
+    self.tumorVcfPaths = []
+
+    edgeRuntimeLogPaths = []
+    edgeStatsLogPaths = []
+
+    for binId in range(self.params.nonlocalWorkBins) :
+        binStr = str(binId).zfill(4)
+        self.candidateVcfPaths.append(self.paths.getHyGenCandidatePath(binStr))
+        if isTumorOnly :
+            self.tumorVcfPaths.append(self.paths.getHyGenTumorPath(binStr))
+        else:
+            self.diploidVcfPaths.append(self.paths.getHyGenDiploidPath(binStr))
+            if isSomatic :
+                self.somaticVcfPaths.append(self.paths.getHyGenSomaticPath(binStr))
+
+        hygenCmd = [ self.params.mantaHyGenBin ]
+        hygenCmd.extend(["--align-stats",statsPath])
+        hygenCmd.extend(["--graph-file",graphPath])
+        hygenCmd.extend(["--bin-index", str(binId)])
+        hygenCmd.extend(["--bin-count", str(self.params.nonlocalWorkBins)])
+        hygenCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize])
+        hygenCmd.extend(["--min-candidate-spanning-count", self.params.minCandidateSpanningCount])
+        hygenCmd.extend(["--min-scored-sv-size", self.params.minScoredVariantSize])
+        hygenCmd.extend(["--ref",self.params.referenceFasta])
+        hygenCmd.extend(["--candidate-output-file", self.candidateVcfPaths[-1]])
+
+        # tumor-only mode
+        if isTumorOnly :
+            hygenCmd.extend(["--tumor-output-file", self.tumorVcfPaths[-1]])
+        else:
+            hygenCmd.extend(["--diploid-output-file", self.diploidVcfPaths[-1]])
+            hygenCmd.extend(["--min-qual-score", self.params.minDiploidVariantScore])
+            hygenCmd.extend(["--min-pass-qual-score", self.params.minPassDiploidVariantScore])
+            hygenCmd.extend(["--min-pass-gt-score", self.params.minPassDiploidGTScore])
+            # tumor/normal mode
+            if isSomatic :
+                hygenCmd.extend(["--somatic-output-file", self.somaticVcfPaths[-1]])
+                hygenCmd.extend(["--min-somatic-score", self.params.minSomaticScore])
+                hygenCmd.extend(["--min-pass-somatic-score", self.params.minPassSomaticScore])
+
+                # temporary fix for FFPE:
+                hygenCmd.append("--skip-remote-reads")
+
+        if self.params.isHighDepthFilter :
+            hygenCmd.extend(["--chrom-depth", self.paths.getChromDepth()])
+
+        edgeRuntimeLogPaths.append(self.paths.getHyGenEdgeRuntimeLogPath(binStr))
+        hygenCmd.extend(["--edge-runtime-log", edgeRuntimeLogPaths[-1]])
+
+        edgeStatsLogPaths.append(self.paths.getHyGenEdgeStatsPath(binStr))
+        hygenCmd.extend(["--edge-stats-log", edgeStatsLogPaths[-1]])
+
+        if self.params.isGenerateSupportBam :
+            hygenCmd.extend(["--evidence-bam-stub", self.paths.getSupportBamStub(binStr)])
+
+        for bamPath in self.params.normalBamList :
+            hygenCmd.extend(["--align-file", bamPath])
+        for bamPath in self.params.tumorBamList :
+            hygenCmd.extend(["--tumor-align-file", bamPath])
+
+        if self.params.isIgnoreAnomProperPair :
+            hygenCmd.append("--ignore-anom-proper-pair")
+        if self.params.isRNA :
+            hygenCmd.append("--rna")
+        if self.params.isUnstrandedRNA :
+            hygenCmd.append("--unstranded")
+
+        hygenTask = preJoin(taskPrefix,"generateCandidateSV_"+binStr)
+        hygenTasks.add(self.addTask(hygenTask,hygenCmd,dependencies=dirTask, memMb=hyGenMemMb))
+
+        # TODO: if the bam is large, for efficiency, consider
+        # 1) filtering the bin-specific bam first w.r.t. the final candidate vcf
+        # 2) then sort the bin-specific bam and merge them
+        # This would require moving the filter/sort bam jobs outside the hygen loop
+        if self.params.isGenerateSupportBam :
+            bamIndex = 0
+            # sort supporting bams extracted from normal samples
+            bamIndex  = sortBams(self, sortBamVcfTasks,
+                                 taskPrefix=taskPrefix, binStr=binStr,
+                                 isNormal=True, bamIdx=bamIndex,
+                                 dependencies=hygenTask)
+            # sort supporting bams extracted from tumor samples
+            bamIndex = sortBams(self, sortBamVcfTasks,
+                                taskPrefix=taskPrefix, binStr=binStr,
+                                isNormal=False, bamIdx=bamIndex,
+                                dependencies=hygenTask)
+
+    vcfTasks = sortAllVcfs(self,taskPrefix=taskPrefix,dependencies=hygenTasks)
+    nextStepWait = copy.deepcopy(hygenTasks)
+
+    if self.params.isGenerateSupportBam :
+        sortBamVcfTasks.union(vcfTasks)
+        mergeBamTasks = set()
+        bamCount = 0
+        # merge supporting bams for each normal sample
+        bamCount = mergeSupportBams(self, mergeBamTasks, taskPrefix=taskPrefix,
+                                    isNormal=True, bamIdx=bamCount,
+                                    dependencies=sortBamVcfTasks)
+
+        # merge supporting bams for each tumor sample
+        bamCount = mergeSupportBams(self, mergeBamTasks, taskPrefix=taskPrefix,
+                                    isNormal=False, bamIdx=bamCount,
+                                    dependencies=sortBamVcfTasks)
+
+        nextStepWait = nextStepWait.union(sortBamVcfTasks)
+        nextStepWait = nextStepWait.union(mergeBamTasks)
+
+    #
+    # sort the edge runtime logs
+    #
+    logListFile = self.paths.getEdgeRuntimeLogListPath()
+    logListTask = preJoin(taskPrefix,"sortEdgeRuntimeLogsInputList")
+    self.addWorkflowTask(logListTask,listFileWorkflow(logListFile,edgeRuntimeLogPaths),dependencies=hygenTasks)
+
+    def getEdgeLogSortCmd(logListFile, outPath) :
+        cmd  = [sys.executable,"-E",self.params.mantaSortEdgeLogs,"-f", logListFile,"-o",outPath]
+        return cmd
+
+    edgeSortCmd=getEdgeLogSortCmd(logListFile,self.paths.getSortedEdgeRuntimeLogPath())
+    self.addTask(preJoin(taskPrefix,"sortEdgeRuntimeLogs"), edgeSortCmd, dependencies=logListTask, isForceLocal=True)
+
+    #
+    # merge all edge stats
+    #
+    statsFileList = self.paths.getStatsFileListPath()
+    statsListTask = preJoin(taskPrefix,"mergeEdgeStatsInputList")
+    self.addWorkflowTask(statsListTask,listFileWorkflow(statsFileList,edgeStatsLogPaths),dependencies=hygenTasks)
+
+    edgeStatsMergeTask=preJoin(taskPrefix,"mergeEdgeStats")
+    edgeStatsMergeCmd=[self.params.mantaStatsMergeBin]
+    edgeStatsMergeCmd.extend(["--stats-file-list",statsFileList])
+    edgeStatsMergeCmd.extend(["--output-file",self.paths.getFinalEdgeStatsPath()])
+    edgeStatsMergeCmd.extend(["--report-file",self.paths.getFinalEdgeStatsReportPath()])
+    self.addTask(edgeStatsMergeTask, edgeStatsMergeCmd, dependencies=statsListTask, isForceLocal=True)
+
+    if not self.params.isRetainTempFiles :
+        # we could delete the temp hygenDir directory here, but it is used for debug so frequently it doesn't seem worth it at present.
+        # rmDirCmd = getRmdirCmd() + [hygenDir]
+        # rmDirTask=self.addTask(preJoin(taskPrefix,"rmTmpDir"),rmDirCmd,dependencies=TBD_XXX_MANY)
+        pass
+
+    return nextStepWait
+
+
+
+class PathInfo:
+    """
+    object to centralize shared workflow path names
+    """
+
+    def __init__(self, params) :
+        self.params = params
+
+    def getStatsPath(self) :
+        return os.path.join(self.params.workDir,"alignmentStats.xml")
+
+    def getStatsSummaryPath(self) :
+        return os.path.join(self.params.statsDir,"alignmentStatsSummary.txt")
+
+    def getChromDepth(self) :
+        return os.path.join(self.params.workDir,"chromDepth.txt")
+
+    def getGraphPath(self) :
+        return os.path.join(self.params.workDir,"svLocusGraph.bin")
+
+    def getTmpGraphDir(self) :
+        return os.path.join(self.getGraphPath()+".tmpdir")
+
+    def getTmpGraphFile(self, gid) :
+        return os.path.join(self.getTmpGraphDir(),"svLocusGraph.%s.bin" % (gid))
+
+    def getHyGenDir(self) :
+        return os.path.join(self.params.workDir,"svHyGen")
+
+    def getHyGenCandidatePath(self, binStr) :
+        return os.path.join(self.getHyGenDir(),"candidateSV.%s.vcf" % (binStr))
+
+    def getSortedCandidatePath(self) :
+        return os.path.join(self.params.variantsDir,"candidateSV.vcf.gz")
+
+    def getSortedCandidateSmallIndelsPath(self) :
+        return os.path.join(self.params.variantsDir,"candidateSmallIndels.vcf.gz")
+
+    def getHyGenDiploidPath(self, binStr) :
+        return os.path.join(self.getHyGenDir(),"diploidSV.%s.vcf" % (binStr))
+
+    def getTempDiploidPath(self) :
+        return os.path.join(self.getHyGenDir(),"diploidSV.vcf.temp")
+
+    def getSortedDiploidPath(self) :
+        return os.path.join(self.params.variantsDir,"diploidSV.vcf.gz")
+
+    def getHyGenSomaticPath(self, binStr) :
+        return os.path.join(self.getHyGenDir(),"somaticSV.%s.vcf" % (binStr))
+
+    def getHyGenTumorPath(self, binStr) :
+        return os.path.join(self.getHyGenDir(),"tumorSV.%s.vcf" % (binStr))
+
+    def getSortedSomaticPath(self) :
+        return os.path.join(self.params.variantsDir,"somaticSV.vcf.gz")
+
+    def getSortedTumorPath(self) :
+        return os.path.join(self.params.variantsDir,"tumorSV.vcf.gz")
+
+    def getHyGenEdgeRuntimeLogPath(self, binStr) :
+        return os.path.join(self.getHyGenDir(),"edgeRuntimeLog.%s.txt" % (binStr))
+
+    def getHyGenEdgeStatsPath(self, binStr) :
+        return os.path.join(self.getHyGenDir(),"edgeStats.%s.xml" % (binStr))
+
+    def getSortedEdgeRuntimeLogPath(self) :
+        return os.path.join(self.params.workDir,"edgeRuntimeLog.txt")
+
+    def getFinalEdgeStatsPath(self) :
+        return os.path.join(self.params.statsDir,"svCandidateGenerationStats.xml")
+
+    def getFinalEdgeStatsReportPath(self) :
+        return os.path.join(self.params.statsDir,"svCandidateGenerationStats.tsv")
+
+    def getGraphStatsPath(self) :
+        return os.path.join(self.params.statsDir,"svLocusGraphStats.tsv")
+
+
+    def getSupportBamPath(self, bamIdx, binStr):
+        return os.path.join(self.getHyGenDir(),
+                            "evidence_%s.bam_%s.bam" % (binStr, bamIdx))
+
+    def getSupportBamStub(self, binStr):
+        return os.path.join(self.getHyGenDir(),
+                            "evidence_%s" % (binStr))
+
+    def getSortedSupportBamPath(self, bamIdx, binStr):
+        return os.path.join(self.getHyGenDir(),
+                            "evidence_%s.bam_%s.sorted.bam" % (binStr, bamIdx))
+
+    def getSortedSupportBamMask(self, bamIdx):
+        return os.path.join(self.getHyGenDir(),
+                            "evidence_*.bam_%s.sorted.bam" % (bamIdx))
+
+    def getMergedSupportBamPath(self, bamIdx):
+        return os.path.join(self.getHyGenDir(),
+                            "evidence.bam_%s.merged.bam" % (bamIdx))
+
+    def getMergedSupportSamPath(self, bamIdx):
+        return os.path.join(self.getHyGenDir(),
+                            "evidence.bam_%s.merged.sam" % (bamIdx))
+
+    def getfilteredSupportSamPath(self, bamIdx):
+        return os.path.join(self.getHyGenDir(),
+                            "evidence.bam_%s.filtered.sam" % (bamIdx))
+
+    def getFinalSupportBamPath(self, bamPath):
+        bamPrefix = os.path.splitext(os.path.basename(bamPath))[0]
+        return os.path.join(self.params.evidenceDir,
+                            "evidence.%s.bam" % (bamPrefix))
+
+    def getSupportBamListPath(self, bamIdx):
+        return os.path.join(self.getHyGenDir(),
+                            "list.evidence.bam_%s.txt" % (bamIdx))
+
+    def getTmpGraphFileListPath(self) :
+        return os.path.join(self.getTmpGraphDir(),"list.svLocusGraph.txt")
+
+    def getVcfListPath(self, label) :
+        return os.path.join(self.getHyGenDir(),"list.%s.txt" % (label))
+
+    def getEdgeRuntimeLogListPath(self) :
+        return os.path.join(self.getHyGenDir(),"list.edgeRuntimeLog.txt")
+
+    def getStatsFileListPath(self) :
+        return os.path.join(self.getHyGenDir(),"list.edgeStats.txt")
+
+
+
+class MantaWorkflow(WorkflowRunner) :
+    """
+    Manta SV discovery workflow
+    """
+
+    def __init__(self,params,iniSections) :
+
+        cleanPyEnv()
+
+        self.params=params
+        self.iniSections=iniSections
+
+        # format bam lists:
+        if self.params.normalBamList is None : self.params.normalBamList = []
+        if self.params.tumorBamList is None : self.params.tumorBamList = []
+
+        # make sure run directory is setup:
+        self.params.runDir=os.path.abspath(self.params.runDir)
+        ensureDir(self.params.runDir)
+
+        # everything that's not intended to be a final result should dump directories/files in workDir
+        self.params.workDir=os.path.join(self.params.runDir,"workspace")
+        ensureDir(self.params.workDir)
+
+        # all finalized pretty results get transfered to resultsDir
+        self.params.resultsDir=os.path.join(self.params.runDir,"results")
+        ensureDir(self.params.resultsDir)
+        self.params.statsDir=os.path.join(self.params.resultsDir,"stats")
+        ensureDir(self.params.statsDir)
+        self.params.variantsDir=os.path.join(self.params.resultsDir,"variants")
+        ensureDir(self.params.variantsDir)
+        self.params.evidenceDir=os.path.join(self.params.resultsDir,"evidence")
+        ensureDir(self.params.evidenceDir)
+#         self.params.reportsDir=os.path.join(self.params.resultsDir,"reports")
+#         ensureDir(self.params.reportsDir)
+
+        indexRefFasta=self.params.referenceFasta+".fai"
+
+        if self.params.referenceFasta is None:
+            raise Exception("No reference fasta defined.")
+        else:
+            checkFile(self.params.referenceFasta,"reference fasta")
+            checkFile(indexRefFasta,"reference fasta index")
+
+        # read fasta index
+        (self.params.chromOrder,self.params.chromSizes) = getFastaChromOrderSize(indexRefFasta)
+
+        self.paths = PathInfo(self.params)
+
+        self.params.isHighDepthFilter = (not (self.params.isExome or self.params.isRNA))
+        self.params.isIgnoreAnomProperPair = (self.params.isRNA)
+
+
+
+    def getSuccessMessage(self) :
+        "Message to be included in email for successful runs"
+
+        msg  = "Manta workflow successfully completed.\n\n"
+        msg += "\tworkflow version: %s\n" % (__version__)
+        return msg
+
+
+
+    def workflow(self) :
+        self.flowLog("Initiating Manta workflow version: %s" % (__version__))
+
+        graphTaskDependencies = set()
+
+        if not self.params.useExistingAlignStats :
+            statsTasks = runStats(self,taskPrefix="getAlignmentStats")
+            graphTaskDependencies |= statsTasks
+
+        if not ((not self.params.isHighDepthFilter) or self.params.useExistingChromDepths) :
+            depthTasks = mantaRunDepthFromAlignments(self)
+            graphTaskDependencies |= depthTasks
+
+        graphTasks = runLocusGraph(self,dependencies=graphTaskDependencies)
+
+        hygenTasks = runHyGen(self,dependencies=graphTasks)
diff --git a/src/python/lib/sharedWorkflow.py b/src/python/lib/sharedWorkflow.py
new file mode 100644
index 0000000..28a1ff4
--- /dev/null
+++ b/src/python/lib/sharedWorkflow.py
@@ -0,0 +1,191 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+Workflow components shared between SV and small variant packages
+"""
+
+
+import os.path
+import sys
+
+# add script path to pull in utils in same directory:
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+sys.path.append(os.path.abspath(scriptDir))
+
+from workflowUtil import isWindows, preJoin, getRobustChromId
+
+
+
+def isString(x):
+    return isinstance(x, basestring)
+
+
+def isIterable(x):
+    return (getattr(x, '__iter__', False) != False)
+
+
+def lister(x):
+    """
+    Convert input into a list, whether it's already iterable or
+    not. Make an exception for individual strings to be returned
+    as a list of one string, instead of being chopped into letters
+    Also, convert None type to empty list:
+    """
+    # special handling in case a single string is given:
+    if x is None : return []
+    if (isString(x) or (not isIterable(x))) : return [x]
+    return list(x)
+
+
+
+def setzer(x) :
+    """
+    convert user input into a set, handling the pathological case
+    that you have been handed a single string, and you don't want
+    a set of letters:
+    """
+    return set(lister(x))
+
+
+def getMkdirCmd() :
+    if isWindows() :
+        return ["mkdir"]
+    else:
+        return ["mkdir","-p"]
+
+def getRmdirCmd() :
+    if isWindows():
+        return ["rd","/s","/q"]
+    else:
+        return ["rm","-rf"]
+
+def getRmCmd() :
+    if isWindows():
+        return ["del","/f"]
+    else:
+        return ["rm","-f"]
+
+def getMvCmd() :
+    if isWindows():
+        return ["move","/y"]
+    else:
+        return ["mv"]
+
+def quoteStringList(strList):
+    return ["\"%s\"" % (x) for x in strList]
+
+
+
+def _runDepthShared(self,taskPrefix, dependencies, bamList, outputPath, depthFunc) :
+    """
+    estimate chrom depth using the specified depthFunc to compute per-sample depth
+    """
+
+    outputFilename=os.path.basename(outputPath)
+
+    tmpDir=outputPath+".tmpdir"
+    makeTmpDirCmd = getMkdirCmd() + [tmpDir]
+    dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), makeTmpDirCmd, dependencies=dependencies, isForceLocal=True)
+
+    tmpFiles = []
+    scatterTasks = set()
+
+    for (bamIndex, bamFile) in enumerate(bamList) :
+        indexStr = str(bamIndex).zfill(3)
+        tmpFiles.append(os.path.join(tmpDir,outputFilename+"."+ indexStr +".txt"))
+        scatterTasks |= setzer(depthFunc(self,taskPrefix+"_sample"+indexStr,dirTask,bamFile,tmpFiles[-1]))
+
+    cmd = [ self.params.mergeChromDepth ]
+    cmd.extend(["--out",outputPath])
+    for tmpFile in tmpFiles :
+        cmd.extend(["--in",tmpFile])
+
+    mergeTask = self.addTask(preJoin(taskPrefix,"mergeChromDepth"),cmd,dependencies=scatterTasks,isForceLocal=True)
+
+    nextStepWait = set()
+    nextStepWait.add(mergeTask)
+
+    if not self.params.isRetainTempFiles :
+        rmTmpCmd = getRmdirCmd() + [tmpDir]
+        rmTask=self.addTask(preJoin(taskPrefix,"rmTmpDir"),rmTmpCmd,dependencies=mergeTask, isForceLocal=True)
+
+    return nextStepWait
+
+
+def runDepthFromAlignments(self, bamList, outputPath, taskPrefix="",dependencies=None) :
+    """
+    estimate chrom depth directly from BAM/CRAM file
+    """
+
+    def depthFunc(self,taskPrefix,dependencies,bamFile,outFile) :
+        outputPath=outFile
+        outputFilename=os.path.basename(outputPath)
+
+        tmpDir=os.path.join(outputPath+".tmpdir")
+        makeTmpDirCmd = getMkdirCmd() + [tmpDir]
+        dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), makeTmpDirCmd, dependencies=dependencies, isForceLocal=True)
+
+        tmpFiles = []
+        scatterTasks = set()
+
+        def getChromosomeGroups(params) :
+            """
+            Iterate through chromosomes/contigs and group small contigs together. This functions as a generator yielding
+            successive contig groups.
+            """
+            minSize=200000
+            group = []
+            headSize = 0
+
+            chromCount = len(params.chromSizes)
+            assert(len(params.chromOrder) == chromCount)
+            for chromIndex in range(chromCount) :
+                chromLabel = params.chromOrder[chromIndex]
+                chromSize = params.chromSizes[chromLabel]
+                if headSize+chromSize <= minSize :
+                    group.append((chromIndex,chromLabel))
+                    headSize += chromSize
+                else :
+                    if len(group) != 0 : yield(group)
+                    group = [(chromIndex,chromLabel)]
+                    headSize = chromSize
+            if len(group) != 0 : yield(group)
+
+        for chromGroup in getChromosomeGroups(self.params) :
+            assert(len(chromGroup) > 0)
+            cid = getRobustChromId(chromGroup[0][0], chromGroup[0][1])
+            if len(chromGroup) > 1 :
+                cid += "_to_"+getRobustChromId(chromGroup[-1][0], chromGroup[-1][1])
+            tmpFiles.append(os.path.join(tmpDir,outputFilename+"_"+cid))
+            cmd = [self.params.getChromDepthBin,"--align-file",bamFile,"--output",tmpFiles[-1]]
+            for (chromIndex,chromLabel) in chromGroup :
+                cmd.extend(["--chrom",chromLabel])
+            scatterTasks.add(self.addTask(preJoin(taskPrefix,"estimateChromDepth_"+cid),cmd,dependencies=dirTask))
+
+        catCmd = [self.params.catScript,"--output",outputPath]+tmpFiles
+        catTask = self.addTask(preJoin(taskPrefix,"catChromDepth"),catCmd,dependencies=scatterTasks, isForceLocal=True)
+
+        nextStepWait = set()
+        nextStepWait.add(catTask)
+
+        return nextStepWait
+
+    return _runDepthShared(self, taskPrefix, dependencies, bamList, outputPath, depthFunc)
+
diff --git a/src/python/lib/workflowUtil.py b/src/python/lib/workflowUtil.py
new file mode 100644
index 0000000..9063027
--- /dev/null
+++ b/src/python/lib/workflowUtil.py
@@ -0,0 +1,378 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+util -- simple utilities shared by bwa/gatk workflow objects
+"""
+
+__author__ = "Chris Saunders"
+
+
+
+import os
+import re
+
+
+
+def ensureDir(d):
+    """
+    make directory if it doesn't already exist, raise exception if something else is in the way:
+    """
+    if os.path.exists(d):
+        if not os.path.isdir(d) :
+            raise Exception("Can't create directory: %s" % (d))
+    else :
+        os.makedirs(d)
+
+
+
+def skipJoin(sep,a,b) :
+    if a == "" : return b
+    elif b == "" : return a
+    return a+sep+b
+
+
+
+def preJoin(a,b) :
+    return skipJoin('_',a,b)
+
+
+
+def checkFile(filename,label="") :
+    if os.path.isfile(filename) : return
+    if label is None : label=""
+    if label != "" : label=" "+label.strip()
+    raise Exception("Can't find%s file '%s'" % (label,filename) )
+
+
+
+def checkDir(dirname,label="") :
+    if os.path.isdir(dirname) : return
+    if label is None : label=""
+    if label != "" : label=" "+label.strip()
+    raise Exception("Can't find%s directory '%s'" % (label,dirname) )
+
+
+
+def which(searchFile) :
+    """
+    search the PATH for searchFile
+
+    result should be the similar to *nix 'which' utility
+    """
+    for searchPath in os.environ["PATH"].split(os.pathsep):
+        test=os.path.join(searchPath,searchFile)
+        if os.path.isfile(test): return test
+
+    return None
+
+
+
+def parseGenomeRegion(regionStr) :
+    """
+    parse a samtools region string and return a (chrom,start,end) tuple
+
+    missing start and end values will be entered as None
+    """
+
+    assert(regionStr is not None)
+
+    word=regionStr.strip().rsplit(':',1)
+
+    if len(word) < 1 :
+        raise Exception("Unexpected format in genome region string: %s" % (regionStr))
+
+    chrom=word[0]
+    if len(chrom) == 0 :
+        raise Exception("Unexpected format in genome region string: %s" % (regionStr))
+
+    start=None
+    end=None
+
+    if (len(word) > 1) :
+        if len(word[1]) == 0 :
+            raise Exception("Unexpected format in genome region string: %s" % (regionStr))
+
+        rangeWord=word[1].split('-')
+        if len(rangeWord) != 2 :
+            # assume this might be an HLA chrom at this point:
+            chrom=regionStr.strip()
+        else :
+            start = int(rangeWord[0])
+            end = int(rangeWord[1])
+
+            if (end < start) or (start < 1) or (end < 1) :
+                raise Exception("Unexpected format in genome region string: %s" % (regionStr))
+
+    return {"chrom":chrom, "start":start, "end":end}
+
+
+
+def isValidSampleId(sampleId) :
+    return re.match("^[A-Za-z0-9_-]+$", sampleId)
+
+
+
+def getBaiFileNames(bamFile) :
+    "return (picard bai filename,samtools bai filename)"
+    return (bamFile[:-(len(".bam"))]+".bai",bamFile+".bai")
+
+
+
+def javaHeapMemReqest(self,javaMb,javaMinMb=None,overheadMb=None) :
+    """
+    Input is the  amount of memory requested for the java heap, output is the
+    amount of java heap memory you're going to actually get, and the total process memory
+    (heap+overhead), to request for the task.
+
+    If javaMinMb is not defined, it is assumed you need to full request
+
+    If overheadMb is not defined, it is set to the global javaTaskHeapOverheadMb value
+
+    return (javaMb,taskMb)
+    """
+    if javaMinMb is None : javaMinMb=javaMb
+    if overheadMb is None : overheadMb=self.params.javaTaskHeapOverheadMb
+
+    javaMb=(self.limitMemMb(javaMb+overheadMb)-overheadMb)
+    if javaMb < javaMinMb :
+        raise Exception("Could not provide minimum java heap memory request for task. Minimum requested: %s Available: %s" % (str(javaMinMb),str(javaMb)))
+    assert (javaMb>0)
+    taskMb=(javaMb+overheadMb)
+    return (javaMb,taskMb)
+
+
+
+def getFastaChromOrderSize(faiFile) :
+    """
+    given a fasta index file,
+    returns
+    (chromOrder,chromSizes)
+    where:
+    chromOrder -- list of chromosomes in fasta order
+    chromSizes -- hash of chromosome sizes
+    """
+    assert os.path.isfile(faiFile)
+
+    chromOrder=[]
+    chromSizes={}
+    for line in open(faiFile) :
+        (chrom,size)=line.strip().split("\t",2)[:2]
+        chromOrder.append(chrom)
+        chromSizes[chrom]=int(size)
+
+    return (chromOrder,chromSizes)
+
+
+
+def getChromIntervals(chromOrder,chromSizes,segmentSize, genomeRegion = None) :
+    """
+    generate chromosome intervals no greater than segmentSize
+
+    chromOrder - iterable object of chromosome names
+    chromSizes - a hash of chrom sizes
+    genomeRegionList - optionally restrict chrom intervals to only cover a list of specified chromosome region
+
+    return chromIndex,chromLabel,start,end,chromSegment
+    where start and end are formated for use with samtools
+    chromSegment is 0-indexed number of segment along each chromosome
+    """
+
+    for (chromIndex, chromLabel) in enumerate(chromOrder) :
+        chromStart=1
+        chromEnd=chromSizes[chromLabel]
+
+        # adjust for the custom genome subsegment case:
+        if genomeRegion is not None :
+            if genomeRegion["chrom"] is not None :
+                if genomeRegion["chrom"] != chromLabel : continue
+                if genomeRegion["start"] is not None :
+                    chromStart=genomeRegion["start"]
+                if genomeRegion["end"] is not None :
+                    chromEnd=genomeRegion["end"]
+
+        chromSize=(chromEnd-chromStart+1)
+        chromSegments=1+((chromSize-1)/segmentSize)
+        segmentBaseSize=chromSize/chromSegments
+        nPlusOne=chromSize%chromSegments
+        start=chromStart
+        for i in xrange(chromSegments) :
+            segSize=segmentBaseSize
+            if i<nPlusOne : segSize += 1
+            end=min(start+(segSize-1),chromStart+chromSize)
+            yield (chromIndex,chromLabel,start,end,i,genomeRegion)
+            start=end+1
+
+
+class PathDigger(object) :
+    """
+    Digs into a well-defined directory structure with prefixed
+    folder names to extract all files associated with
+    combinations of directory names.
+
+    This is written primarily to go through the CASAVA 1.8 output
+    structure.
+
+    #casava 1.8 fastq example:
+    fqDigger=FileDigger(['Project_','Sample_'],".fastq.gz")
+    """
+
+    def __init__(self,prefixList,targetExtension=None) :
+        """
+        if no target extension, then list directories at the tip of the prefix list
+        """
+        self.prefixList=prefixList
+        self.targetExtension=targetExtension
+
+
+    def getNextPath(self,basePath,depth=0,ans=tuple()) :
+        """
+        """
+        if depth < len(self.prefixList) :
+            for d in os.listdir(basePath) :
+                nextDir=os.path.join(basePath,d)
+                if not os.path.isdir(nextDir) : continue
+                if not d.startswith(self.prefixList[depth]) : continue
+                value=d[len(self.prefixList[depth]):]
+                for val in self.getNextPath(nextDir,depth+1,ans+tuple([value])) :
+                    yield val
+        else:
+            if self.targetExtension is None :
+                yield ans+tuple([basePath])
+            else :
+                for f in os.listdir(basePath) :
+                    nextPath=os.path.join(basePath,f)
+                    if not os.path.isfile(nextPath) : continue
+                    if not f.endswith(self.targetExtension) : continue
+                    yield ans+tuple([nextPath])
+
+
+
+def cleanId(input_id) :
+    """
+    filter id so that it's safe to use as a pyflow indentifier
+    """
+    import re
+    return re.sub(r'([^a-zA-Z0-9_\-])', "_", input_id)
+
+
+
+def getRobustChromId(chromIndex,chromLabel):
+    return "%s_%s" % (str(chromIndex).zfill(3),cleanId(chromLabel))
+
+
+
+class GenomeSegment(object) :
+    """
+    organizes all variables which can change
+    with each genomic segment.
+
+    The genomic segment is defined by:
+
+    1. chromosome
+    2. begin position (1-indexed closed)
+    3. end position (1-indexed closed)
+    4. chromosome segment (ie. bin) number (0-indexed)
+    """
+
+    def __init__(self,chromIndex,chromLabel,beginPos,endPos,binId,genomeRegion) :
+        """
+        arguments are the 4 genomic interval descriptors detailed in class documentation
+        """
+        self.chromLabel = chromLabel
+        self.beginPos = beginPos
+        self.endPos = endPos
+        self.bamRegion = chromLabel + ':' + str(beginPos) + '-' + str(endPos)
+        self.binId = binId
+        self.binStr = str(binId).zfill(4)
+
+        regionId=getRobustChromId(chromIndex,chromLabel)
+        if genomeRegion is not None :
+            if genomeRegion['start'] is not None :
+                regionId += "-"+str(genomeRegion['start'])
+                if genomeRegion['end'] is not None :
+                    regionId += "-"+str(genomeRegion['end'])
+        self.id = "chromId_%s_%s" % (regionId, self.binStr)
+
+    def size(self) :
+        return (self.endPos-self.beginPos)+1
+
+
+def getNextGenomeSegment(params) :
+    """
+    generator which iterates through all genomic segments and
+    returns a segmentValues object for each one.
+    """
+    MEGABASE = 1000000
+    scanSize = params.scanSizeMb * MEGABASE
+
+    if params.genomeRegionList is None :
+        for segval in getChromIntervals(params.chromOrder,params.chromSizes, scanSize) :
+            yield GenomeSegment(*segval)
+    else :
+        for genomeRegion in params.genomeRegionList :
+            for segval in getChromIntervals(params.chromOrder,params.chromSizes, scanSize, genomeRegion) :
+                yield GenomeSegment(*segval)
+
+
+
+def cleanPyEnv() :
+    """
+    clear out some potentially destabilizing env variables:
+    """
+    clearList = [ "PYTHONPATH", "PYTHONHOME"]
+    for key in clearList :
+        if key in os.environ :
+            del os.environ[key]
+
+    os.environ["LC_ALL"] = "C"
+
+
+
+def isLocalSmtp() :
+    """
+    return true if a local smtp server is available
+    """
+    import smtplib
+    try :
+        smtplib.SMTP('localhost')
+    except :
+        return False
+    return True
+
+
+def _isWindows() :
+    import platform
+    return (platform.system().find("Windows") > -1)
+
+
+class Constants :
+    isWindows=_isWindows()
+
+
+def isWindows() :
+    return Constants.isWindows
+
+
+def exeFile(filename):
+    """
+    adjust filename suffix by platform
+    """
+    if isWindows() : return filename + ".exe"
+    return filename
diff --git a/src/python/libexec/CMakeLists.txt b/src/python/libexec/CMakeLists.txt
new file mode 100644
index 0000000..316e79e
--- /dev/null
+++ b/src/python/libexec/CMakeLists.txt
@@ -0,0 +1,31 @@
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+set(INSTALL_TO_DIR "${THIS_LIBEXECDIR}")
+
+file(RELATIVE_PATH THIS_RELATIVE_PYTHON_LIBDIR "${INSTALL_TO_DIR}" "${THIS_PYTHON_LIBDIR}")
+file(RELATIVE_PATH THIS_RELATIVE_LIBEXECDIR "${INSTALL_TO_DIR}" "${THIS_LIBEXECDIR}")
+
+include("${THIS_MACROS_CMAKE}")
+configure_files("${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}" "*.py")
+
+include("${THIS_GLOBALS_CMAKE}") # get THIS_EXECUTABLE_PERMISSIONS
+install_fileglob("${CMAKE_CURRENT_BINARY_DIR}" "${INSTALL_TO_DIR}" "*.py"
+                 "${THIS_EXECUTABLE_PERMISSIONS}")
+
diff --git a/src/python/libexec/cat.py b/src/python/libexec/cat.py
new file mode 100755
index 0000000..94278ec
--- /dev/null
+++ b/src/python/libexec/cat.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+python version of cat to work around portability/quoting and shell line limits
+"""
+
+import os, sys
+
+
+def ensureDir(d):
+    """
+    make directory if it doesn't already exist, raise exception if
+    something else is in the way:
+    """
+    if os.path.exists(d):
+        if not os.path.isdir(d) :
+            raise Exception("Can't create directory: %s" % (d))
+    else :
+        os.makedirs(d)
+
+
+def getOptions() :
+
+    from optparse import OptionParser
+
+    usage = "usage: %prog -o output [input [input...]]"
+    parser = OptionParser(usage=usage,description="Concatenate input files to output")
+
+    parser.add_option("-o","--output", dest="outFile",default=False,
+                      help="output filename (required)")
+
+    (options,args) = parser.parse_args()
+
+    if len(args) == 0 :
+        parser.print_help()
+        sys.exit(2)
+
+    # validate input:
+    for arg in args :
+        if not os.path.isfile(arg) :
+            raise Exception("Can't find input file: " +arg)
+
+    if options.outFile is None :
+        parser.print_help()
+        sys.exit(2)
+
+    ensureDir(os.path.dirname(os.path.abspath(options.outFile)))
+
+    return (options,args)
+
+
+
+def main() :
+
+    (options,args) = getOptions()
+
+    ofp = open(options.outFile,"w")
+    for arg in args :
+        for line in open(arg) :
+            ofp.write(line)
+
+
+main()
diff --git a/src/python/libexec/denovo_scoring.py b/src/python/libexec/denovo_scoring.py
new file mode 100755
index 0000000..df386ea
--- /dev/null
+++ b/src/python/libexec/denovo_scoring.py
@@ -0,0 +1,190 @@
+#!/usr/bin/env python
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+
+import sys
+from os import path
+from os.path import exists, abspath, dirname, basename, splitext, join
+
+
+def check_genotype(probandGT, fatherGT, motherGT):
+    isConsistent = False
+
+    fatherGTItems = fatherGT.split('/')
+    motherGTItems = motherGT.split('/')
+    for it1 in fatherGTItems:
+        for it2 in motherGTItems:
+            temp = [it1, it2]
+            temp.sort()
+            GT = temp[0]+'/'+temp[1]
+            if GT == probandGT:
+                isConsistent = True
+                break
+
+    return isConsistent
+
+
+def add_dq(tokens, probandIx, dq):
+    for ix in xrange(9, len(tokens)):
+        if (ix == probandIx):
+            tokens[ix] += ":%s" % dq
+        else:
+            tokens[ix] += ":."
+
+
+def process_vcf(vcfFile, probandID,
+                fatherID, motherID):
+
+    vcfFile = abspath(vcfFile)
+    dataDir = dirname(vcfFile)
+    filePrefix = splitext(basename(vcfFile))[0]
+    outFile = join(dataDir, filePrefix+".de_novo.vcf")
+    statsFile = join(dataDir, filePrefix+".de_novo.stats.txt")
+
+    fpOut = open(outFile, 'wb')
+    fpStats = open(statsFile, 'wb')
+
+    countPassed = 0
+    countFiltered = 0
+    consistencyDict = {}
+
+    # parser
+    isFormatAdded = False
+    isIxFound = False
+    colNameLine = ""
+    probandIx = -1
+    fatherIx = -1
+    motherIx = -1
+
+    fpVcf = open(vcfFile, 'rb')
+    for line in fpVcf:
+        if line[0] == '#':
+            if not(isFormatAdded) and (line[:8] == "##FORMAT"):
+                fpOut.write("##FORMAT=<ID=DQ,Number=1,Type=Integer,Description=\"De novo quality score\">\n")
+                isFormatAdded = True
+            fpOut.write(line)
+            colNameLine = line
+            continue
+        elif not(isIxFound):
+            # parse format line to get the columns of proband & parents
+            tokens = colNameLine.split()
+            for ix in xrange(len(tokens)):
+                if tokens[ix] == probandID:
+                    probandIx = ix
+                elif tokens[ix] == fatherID:
+                    fatherIx = ix
+                elif tokens[ix] == motherID:
+                    motherIx = ix
+
+            wrongID = ""
+            if probandIx == -1:
+                wrongID = probandID
+            if fatherIx == -1:
+                wrongID += (',%s' % fatherID)
+            if motherIx == -1:
+                wrongID += (',%s' % motherID)
+
+            if wrongID:
+                errMsg = ('The sample ID %s does not exist in the vcf.'
+                          % wrongID)
+                sys.stderr.write(errMsg + '\nProgram exits.')
+                sys.exit(1)
+
+        tokens = line.split()
+        format = tokens[8]
+
+        items = format.split(':')
+        GTix = -1
+        for ix in xrange(len(items)):
+            if items[ix] == "GT":
+                GTix = ix
+
+        items = tokens[probandIx].split(':')
+        probandGT = items[GTix]
+
+        items = tokens[fatherIx].split(':')
+        fatherGT = items[GTix]
+
+        items = tokens[motherIx].split(':')
+        motherGT = items[GTix]
+
+        # add DQ to the format string
+        format += ":DQ"
+        isConsistent = check_genotype(probandGT, fatherGT, motherGT)
+        if not(isConsistent):
+            # DQ set to 60
+            add_dq(tokens, probandIx, "60")
+
+            # stats
+            filter = tokens[6]
+            if filter.upper() == "PASS":
+                countPassed += 1
+            else:
+                countFiltered += 1
+
+            GTstring = probandGT + '-' + fatherGT + '-' + motherGT
+            if not(GTstring in consistencyDict):
+                consistencyDict[GTstring] = 0
+            consistencyDict[GTstring] += 1
+        else:
+            # DQ set to 0
+            add_dq(tokens, probandIx, "0")
+
+        newLine = ""
+        for i in xrange(8):
+            newLine += tokens[i] + "\t"
+        newLine += format
+        for i in xrange(9, len(tokens)):
+            newLine += "\t" + tokens[i]
+        fpOut.write(newLine+"\n")
+
+    fpVcf.close()
+    fpOut.close()
+
+    fpStats.write("# of passed SVs: %s\n" % (countPassed))
+    fpStats.write("# of filtered SVs: %s\n" % (countFiltered))
+    fpStats.write("probandGT-fatherGT-motherGT\tcounts\n")
+    genotypes = consistencyDict.keys()
+    genotypes.sort()
+    for gt in genotypes:
+        fpStats.write("%s\t%s\n" % (gt, consistencyDict[gt]))
+    fpStats.close()
+
+
+if __name__=='__main__':
+
+    usage = "denovo_scoring.py <vcf file> <proband sample ID> <father sample ID> <mother sample ID>\n"
+    if len(sys.argv) <= 4:
+        sys.stderr.write(usage)
+        sys.exit(1)
+
+    vcfFile = sys.argv[1]
+    probandID = sys.argv[2]
+    fatherID = sys.argv[3]
+    motherID = sys.argv[4]
+
+    if not(exists(vcfFile)):
+        errMsg = ('The file %s does not exist.'
+                  % vcfFile)
+        sys.stderr.write(errMsg + '\nProgram exits.')
+        sys.exit(1)
+
+    process_vcf(vcfFile, probandID,
+                fatherID, motherID)
diff --git a/src/python/libexec/extractSmallIndelCandidates.py b/src/python/libexec/extractSmallIndelCandidates.py
new file mode 100755
index 0000000..ceb12c2
--- /dev/null
+++ b/src/python/libexec/extractSmallIndelCandidates.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+take a subset of the manta candidate vcf which can be fed into a small variant caller
+
+select for (1) simple insert/delete combinations and (2) length <= X
+"""
+
+import os, sys
+import re
+
+
+
+def getKeyVal(string,key) :
+    match=re.search("%s=([^;\t]*);?" % (key) ,string)
+    if match is None : return None
+    return match.group(1);
+
+
+class VCFID :
+    CHROM = 0
+    POS = 1
+    REF = 3
+    ALT = 4
+    QUAL = 5
+    FILTER = 6
+    INFO = 7
+
+
+
+class VcfRecord :
+    """
+    simple vcf record parser
+    """
+
+    def __init__(self, line) :
+        self.line = line
+        w=line.strip().split('\t')
+        self.chrom=w[VCFID.CHROM]
+        self.pos=int(w[VCFID.POS])
+        self.ref=w[VCFID.REF]
+        self.alt=w[VCFID.ALT]
+
+
+
+def getOptions() :
+
+    from optparse import OptionParser
+
+    usage = "usage: %prog [options] < candidate.vcf > smallIndel.vcf"
+    parser = OptionParser(usage=usage)
+
+    parser.add_option("--maxSize", dest="maxSize", type="int",
+                      help="maximum indel size, no default (required)")
+
+    (opt,args) = parser.parse_args()
+
+    if (opt.maxSize is None) or (len(args) != 0) :
+        parser.print_help()
+        sys.exit(2)
+
+    # validate input:
+    if opt.maxSize < 1:
+        raise Exception("Invalid maxSize value: %i" % (opt.maxSize))
+
+    return (opt,args)
+
+
+
+def main() :
+
+    infp = sys.stdin
+    outfp = sys.stdout
+
+    (options,args) = getOptions()
+
+    for line in infp :
+        if line[0] == '#' :
+            outfp.write(line)
+            continue
+
+        rec = VcfRecord(line)
+
+        # remove symbolic alleles:
+        if rec.alt.find("<") != -1 : continue
+
+        # remove translocations
+        if rec.alt.find("[") != -1 : continue
+        if rec.alt.find("]") != -1 : continue
+        if rec.alt.find(":") != -1 : continue
+
+        # we're assume there are no multiple alts in the candidate records
+        assert( rec.alt.find(",") == -1 )
+
+        if len(rec.ref) > (options.maxSize+1) : continue
+        if len(rec.alt) > (options.maxSize+1) : continue
+
+        outfp.write(line)
+
+
+
+main()
+
diff --git a/src/python/libexec/filterBam.py b/src/python/libexec/filterBam.py
new file mode 100755
index 0000000..f9294d5
--- /dev/null
+++ b/src/python/libexec/filterBam.py
@@ -0,0 +1,111 @@
+#! /usr/bin/env python
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+Filter the input sam file,
+Only keep evidence reads supporting SVs in the candidate vcf.
+"""
+
+import sys
+import re
+import gzip
+from os.path import isfile
+from optparse import OptionParser
+from glob import glob
+from subprocess import call
+
+
+def getOptions():
+    usage = "usage: %prog [options] samtools_bin candidate_vcf input_sam filtered_sam filtered_bam"
+    parser = OptionParser(usage=usage)
+    (options,args) = parser.parse_args()
+    if len(args) != 5 :
+        parser.print_help()
+        sys.exit(2)
+
+    return (options,args)
+
+
+def collect_SVs(candidateVcf, svSet):
+
+    fpVcf = gzip.open(candidateVcf, 'rb')
+    for line in fpVcf:
+        if line[0] == '#':
+            continue
+
+        tokens = line.split()
+        svID = tokens[2]
+        svSet.add(svID)
+    fpVcf.close()
+
+
+def filter_sam(svSet, inputSam, filteredSam):
+    fpIn = open(inputSam, 'rb')
+    fpOut = open(filteredSam, 'wb')
+    for line in fpIn:
+        if line[0] == '@':
+            fpOut.write(line)
+            continue
+
+        isSkip = True
+        tokens = line[:-1].split()
+        numToken = len(tokens)
+        for ix in xrange(numToken):
+            token = tokens[ix]
+            if token[:5] == "ZM:Z:":
+                newStr = token[:5]
+                svItems = token[5:].split(',')
+                for sv in svItems:
+                    svID = sv.split("|")[0]
+                    # filter out SVs not in the candidate vcf
+                    if svID in svSet:
+                        isSkip = False
+                        newStr += sv + ','
+                tokens[ix] = newStr[:-1]
+
+        # skip the read if none of its supported SVs
+        # is included in the candidate vcf
+        if not(isSkip):
+            for ix in xrange(numToken-1):
+                fpOut.write("%s\t" % tokens[ix])
+            fpOut.write("%s\n" % tokens[numToken-1])
+
+    fpOut.close()
+    fpIn.close()
+
+
+
+if __name__=='__main__':
+
+    # Command-line args
+    (options,args) = getOptions()
+    samtoolsBin = args[0]
+    candidateVcf = args[1]
+    inputSam = args[2]
+    filteredSam = args[3]
+    filteredBam = args[4]
+
+    svSet = set([])
+    collect_SVs(candidateVcf, svSet)
+    filter_sam(svSet, inputSam, filteredSam)
+
+    # convert filtered sam to bam
+    call([ samtoolsBin, "view", "-h", "-b",
+           "-o", filteredBam, filteredSam ])
diff --git a/src/python/libexec/mergeBam.py b/src/python/libexec/mergeBam.py
new file mode 100755
index 0000000..a98e47a
--- /dev/null
+++ b/src/python/libexec/mergeBam.py
@@ -0,0 +1,74 @@
+#! /usr/bin/env python
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+Merge bams listed in a file
+"""
+
+import sys
+import re
+from os.path import isfile
+from optparse import OptionParser
+from glob import glob
+from subprocess import call
+from shutil import copyfile
+
+def getOptions():
+    usage = "usage: %prog [options] samtools_bin bam_mask merged_bam merged_sam bam_list_file"
+    parser = OptionParser(usage=usage)
+    (options,args) = parser.parse_args()
+
+    if len(args) != 5 :
+        parser.print_help()
+        sys.exit(2)
+
+    return (options,args)
+
+
+
+if __name__=='__main__':
+
+    # Command-line args
+    (options,args) = getOptions()
+    samtoolsBin = args[0]
+    bamMask = args[1]
+    mergedBam = args[2]
+    mergedSam = args[3]
+    bamListFile = args[4]
+
+    firstBam = ""
+    fileCount = 0
+    fpList = open(bamListFile, 'wb')
+    for bam in glob(bamMask):
+        fpList.write(bam + "\n")
+
+        if not(firstBam):
+            firstBam = bam
+        fileCount += 1
+    fpList.close()
+
+    if fileCount > 1:
+        call([ samtoolsBin, "merge", "-b",
+               bamListFile, mergedBam ])
+    elif fileCount == 1:
+        copyfile(firstBam, mergedBam)
+
+    call([ samtoolsBin, "view", "-h",
+           "-o", mergedSam, mergedBam ])
diff --git a/src/python/libexec/mergeChromDepth.py b/src/python/libexec/mergeChromDepth.py
new file mode 100755
index 0000000..41788e8
--- /dev/null
+++ b/src/python/libexec/mergeChromDepth.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+Merge chrom depth from multiple samples/BAM files
+"""
+
+import os,sys
+
+scriptDir=os.path.abspath(os.path.dirname(__file__))
+pythonLibDir=os.path.abspath(os.path.join(scriptDir,"@THIS_RELATIVE_PYTHON_LIBDIR@"))
+sys.path.append(pythonLibDir)
+
+from workflowUtil import checkFile
+
+
+
+def getOptions() :
+
+    from optparse import OptionParser
+
+    usage = "usage: %prog [options]"
+    parser = OptionParser(usage=usage)
+
+    parser.add_option("--in", type="string",dest="inFiles",metavar="FILE", action="append",
+                      help="input depth filename, argument may be provided more than once to provide all input")
+    parser.add_option("--out", type="string",dest="outFile",metavar="FILE",
+                      help="output depth filename (required)")
+
+    (options,args) = parser.parse_args()
+
+    if len(args) != 0 :
+        parser.print_help()
+        sys.exit(2)
+
+    # validate input:
+    if options.inFiles is None :
+        parser.print_help()
+        sys.exit(2)
+
+    if options.outFile is None :
+        parser.print_help()
+        sys.exit(2)
+
+    for inFile in options.inFiles :
+        checkFile(inFile,"input depth")
+
+    return (options,args)
+
+
+def main() :
+
+    (options,args) = getOptions()
+
+    chrtot = {}
+    for (index,inFile) in enumerate(options.inFiles) :
+        chr = {}
+        ifp = open(inFile)
+        for line in ifp :
+            w = line.strip().split('\t')
+            assert(w[0] not in chr)
+            chr[w[0]] = float(w[1])
+
+        if (index!=0) :
+            assert(len(chrtot) == len(chr))
+
+        for k in chr :
+            if (index!=0) :
+                assert(k in chrtot)
+                chrtot[k] += chr[k]
+            else :
+                chrtot[k] = chr[k]
+
+    ofp = open(options.outFile,"w")
+    for k in chrtot :
+        ofp.write("%s\t%.3f\n" % (k,chrtot[k]))
+
+
+main()
diff --git a/src/python/libexec/ploidyFilter.py b/src/python/libexec/ploidyFilter.py
new file mode 100755
index 0000000..9a59fe9
--- /dev/null
+++ b/src/python/libexec/ploidyFilter.py
@@ -0,0 +1,261 @@
+#! /usr/bin/env python
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+filter vcf to remove overlapping diploid calls which can't be resolved to two haplotypes
+"""
+
+import sys
+import re
+from os.path import exists, isfile
+from optparse import OptionParser
+
+def getKeyVal(string,key) :
+    match=re.search("%s=([^;\t]*);?" % (key) ,string)
+    if match is None : return None
+    return match.group(1);
+
+
+VCF_CHROM = 0
+VCF_POS = 1
+VCF_REF = 3
+VCF_ALT = 4
+VCF_QUAL = 5
+VCF_FILTER = 6
+VCF_INFO = 7
+VCF_FORMAT = 8
+VCF_SAMPLE = 9
+
+class VcfRecord :
+    def __init__(self, line) :
+        #self.line = line
+        w = line.strip().split('\t')
+        self.chrom = w[VCF_CHROM]
+        self.pos = int(w[VCF_POS])
+        self.isPass = (w[VCF_FILTER] == "PASS")
+
+        self.end = self.pos+len(w[VCF_REF])-1
+        val = getKeyVal(w[VCF_INFO],"END")
+        if val is not None :
+            self.end = int(val)
+
+        self.svLen = None
+        val = getKeyVal(w[VCF_INFO],"SVLEN")
+        if val is not None :
+            self.svLen = int(val)
+
+        self.svType = getKeyVal(w[VCF_INFO],"SVTYPE")
+
+        fmt = w[VCF_FORMAT]
+        gtIx = fmt.split(':').index("GT")
+
+        self.gtType = []
+        for sample in w[VCF_SAMPLE:] :
+            gt = sample.split(':')[gtIx]
+            t = gt.split('/')
+            self.gtType.append(int(t[0]) + int(t[1]))
+
+
+def getOptions():
+    usage = "usage: %prog [options] vcf > filtered_vcf"
+    parser = OptionParser(usage=usage)
+    (options,args) = parser.parse_args()
+
+    if len(args) != 1 :
+        parser.print_help()
+        sys.exit(2)
+
+    # validate input:
+    if not isfile(args[0]) :
+        raise Exception("Can't find input vcf file: " + args[0])
+
+    return (options,args)
+
+
+def process_block(recordBlock, nextPos, filteredSites):
+
+    # sys.stderr.write("processing a block with %s sites...\n" % len(recordBlock))
+
+    while (len(recordBlock) > 0):
+        target = recordBlock[0]
+        targetEnd = target.end
+        # when a new target's end is larger than
+        # the pos of the next site to be read,
+        # we need to read in more sites
+        if targetEnd > nextPos:
+            break
+
+        targetLen = -1
+        if target.svLen is not None:
+            targetLen = abs(target.svLen)
+        targetType = target.svType
+
+        ploidySum = []
+        for gtPloidy in target.gtType :
+            ploidySum.append(gtPloidy)
+        overlapIds = [0]
+
+        for ix in xrange(1, len(recordBlock)):
+            record = recordBlock[ix]
+            pos = record.pos
+            svLen = -1
+            if record.svLen is not None:
+                svLen = abs(record.svLen)
+            svType = record.svType
+
+            # collecting stacked sites
+            # with the same type and similar size
+            if pos < targetEnd:
+                if (
+                   # (svType == targetType) and
+                    (svLen < 2*targetLen) and
+                    (svLen > 0.5*targetLen)):
+                    for (sampleIndex, gtPloidy) in enumerate(record.gtType) :
+                        ploidySum[sampleIndex] += gtPloidy
+                    overlapIds.append(ix)
+            else:
+                break
+
+        overlapIds.reverse()
+        isAnomPloidy = False
+        for psum in ploidySum :
+            if psum > 2 :
+                isAnomPloidy = True
+        if isAnomPloidy:
+            # sites to be filtered due to ploidity
+            for i in overlapIds:
+                site = recordBlock.pop(i)
+                chrm = site.chrom
+                pos = site.pos
+                end = site.end
+
+                if not(chrm in filteredSites):
+                    filteredSites[chrm] = {}
+                filteredSites[chrm][(pos, end)] = True
+        else:
+            # sites to be kept
+            for i in overlapIds:
+                recordBlock.pop(i)
+
+
+def find_stacked_variants(vcfFile):
+    filteredSites = {}
+    recordBlock = []
+    maxEnd = -1
+    count = 0
+
+    for line in open(vcfFile):
+        if line[0] == "#": continue
+        record = VcfRecord(line)
+
+        chrm = record.chrom
+        pos = record.pos
+        svType = record.svType
+        count += 1
+
+        # ignore filtered records
+        isPassed = record.isPass
+        if not(isPassed):
+            continue
+
+        # consider DEL & DUP only
+        if (svType != "DEL") and (svType != "DUP"): continue
+        end = record.end
+
+        # set up the first target site
+        if (len(recordBlock) == 0):
+            targetChrm = chrm
+            targetEnd = end
+        else:
+            targetChrm = recordBlock[0].chrom
+            targetEnd = recordBlock[0].end
+
+        # keep reading into the block until exceeding the target's end
+        if (chrm == targetChrm) and (pos < targetEnd):
+            recordBlock.append(record)
+            maxEnd = max(maxEnd, end)
+        else:
+            nextPos = pos
+            if (chrm != targetChrm):
+                nextPos = maxEnd + 1
+                maxEnd = -1
+
+            # process the block until pos < the new target's end
+            process_block(recordBlock, nextPos, filteredSites)
+
+            recordBlock.append(record)
+            maxEnd = max(maxEnd, end)
+
+    # process the last block
+    process_block(recordBlock, maxEnd+1, filteredSites)
+
+    sys.stderr.write("Processed %s sites in the vcf.\n" % count)
+    numFiltered = 0
+    for c in filteredSites:
+        numFiltered += len(filteredSites[c])
+    sys.stderr.write("Filtered %s sites due to ploidy.\n" % numFiltered)
+    sys.stderr.write("Filtered sites: %s\n" % filteredSites)
+
+    return filteredSites
+
+
+def check_filtered_sites(site, filteredSites):
+    chrm = site.chrom
+    pos = site.pos
+    end = site.end
+
+    return ((chrm in filteredSites) and ((pos, end) in filteredSites[chrm]))
+
+
+def filter_variants(vcfFile, filteredSites):
+
+    isHeaderAdded = False
+    filterHeadline = "##FILTER=<ID=Ploidy,Description=\"For DEL & DUP variants, the genotypes of overlapping variants (with similar size) are inconsistent with diploid expectation\">\n"
+
+    vcfOut = sys.stdout
+
+    for line in open(vcfFile):
+        if line[0] != '#':
+            site = VcfRecord(line)
+            # only filter on DEL & DUP for now
+            if (site.isPass and
+                ((site.svType == "DEL") or (site.svType == "DUP"))):
+
+                isFiltered = check_filtered_sites(site, filteredSites)
+                if isFiltered:
+                    w = line.strip().split('\t')
+                    # add the "Ploidy" filter
+                    w[VCF_FILTER] = "Ploidy"
+                    line = "\t".join(w)+"\n"
+        elif not(isHeaderAdded) and (line[:8] == "##FILTER"):
+            vcfOut.write(filterHeadline)
+            isHeaderAdded = True
+
+        vcfOut.write(line)
+
+
+if __name__=='__main__':
+
+    # Command-line args
+    (options,args) = getOptions()
+    vcfFile = args[0]
+
+    filteredSites = find_stacked_variants(vcfFile)
+    filter_variants(vcfFile, filteredSites)
diff --git a/src/python/libexec/sortBam.py b/src/python/libexec/sortBam.py
new file mode 100755
index 0000000..0b5956a
--- /dev/null
+++ b/src/python/libexec/sortBam.py
@@ -0,0 +1,54 @@
+#! /usr/bin/env python
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+Sort the given bam file only if it exists
+"""
+
+import sys
+import re
+from os.path import isfile
+from optparse import OptionParser
+from subprocess import call
+
+def getOptions():
+    usage = "usage: %prog [options] samtools_bin original_bam sorted_bam"
+    parser = OptionParser(usage=usage)
+    (options,args) = parser.parse_args()
+
+    if len(args) != 3 :
+        parser.print_help()
+        sys.exit(2)
+
+    return (options,args)
+
+
+
+if __name__=='__main__':
+
+    # Command-line args
+    (options,args) = getOptions()
+    samtoolsBin = args[0]
+    originalBam = args[1]
+    sortedBam = args[2]
+
+    if isfile(originalBam):
+        call([ samtoolsBin, "sort",
+               originalBam, sortedBam ])
diff --git a/src/python/libexec/sortEdgeLogs.py b/src/python/libexec/sortEdgeLogs.py
new file mode 100755
index 0000000..ed7be93
--- /dev/null
+++ b/src/python/libexec/sortEdgeLogs.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+sort manta edge runtime logs
+"""
+
+import os, sys
+
+
+def ensureDir(d):
+    """
+    make directory if it doesn't already exist, raise exception if
+    something else is in the way:
+    """
+    if os.path.exists(d):
+        if not os.path.isdir(d) :
+            raise Exception("Can't create directory: %s" % (d))
+    else :
+        os.makedirs(d)
+
+
+
+def listInputLogs(logListFile,args) :
+    for arg in args :
+        yield arg
+    if logListFile is None : return
+    for logFile in open(logListFile) :
+        yield logFile.strip()
+
+
+
+def getOptions() :
+
+    from optparse import OptionParser
+
+    usage = "usage: %prog [options] -o output_log [input_log [input_log...]]"
+    parser = OptionParser(usage=usage)
+
+    parser.add_option("-o", dest="outFile",default=False,
+                      help="sorted output filename (required)")
+    parser.add_option("-f", dest="logListFile",
+                      help="File listing input log files, one file per line. These will be used in addition to any provided directly on the command-line")
+
+    (options,args) = parser.parse_args()
+
+    if len(args) == 0 and not options.logListFile:
+        parser.print_help()
+        sys.exit(2)
+
+    if options.outFile is None :
+        parser.print_help()
+        sys.exit(2)
+
+    # validate input:
+    ensureDir(os.path.dirname(os.path.abspath(options.outFile)))
+
+    if options.logListFile is not None :
+        if not os.path.exists(options.logListFile) :
+            raise Exception("Can't find log list file: " + options.logListFile)
+
+    for logFile in listInputLogs(options.logListFile,args) :
+        if not os.path.isfile(logFile) :
+            raise Exception("Can't find input log file: " +logFile)
+
+    return (options,args)
+
+
+
+def main() :
+
+    (options,args) = getOptions()
+    slog = []
+
+    for logFile in listInputLogs(options.logListFile,args) :
+        for line in open(logFile) :
+            w1=float(line.split('\t',2)[1])
+            slog.append((w1,line))
+
+    slog.sort(reverse=True)
+
+    ofp = open(options.outFile,"w")
+
+    for (w1,line) in slog :
+        ofp.write(line)
+
+
+main()
diff --git a/src/python/libexec/sortVcf.py b/src/python/libexec/sortVcf.py
new file mode 100755
index 0000000..e9a9799
--- /dev/null
+++ b/src/python/libexec/sortVcf.py
@@ -0,0 +1,256 @@
+#!/usr/bin/env python
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+sort input vcf
+"""
+
+import os, sys
+import re
+
+
+def isInfoKey(string,key) :
+    match=re.search("[;\t]%s[;\t]" % (key) ,string)
+    return match is not None
+
+
+def getKeyVal(string,key) :
+    match=re.search("%s=([^;\t]*);?" % (key) ,string)
+    if match is None : return None
+    return match.group(1);
+
+
+VCF_CHROM = 0
+VCF_POS = 1
+VCF_REF = 3
+VCF_ALT = 4
+VCF_QUAL = 5
+VCF_FILTER = 6
+VCF_INFO = 7
+
+
+
+class VcfRecord :
+    def __init__(self, line, isUnique) :
+        self.line = line
+        w=line.strip().split('\t')
+        self.chrom=w[VCF_CHROM]
+        self.pos=int(w[VCF_POS])
+        if isUnique :
+            self.ref=w[VCF_REF]
+            self.alt=w[VCF_ALT]
+            self.qual=w[VCF_QUAL]
+            self.isPass=(w[VCF_FILTER] == "PASS")
+            self.invState=None
+            inv3 = isInfoKey(w[VCF_INFO],"INV3")
+            inv5 = getKeyVal(w[VCF_INFO],"INV5")
+            assert(not (inv3 and inv5))
+            if inv3: self.invState = "INV3"
+            if inv5: self.invState = "INV5"
+        self.endPos=self.pos+len(w[VCF_REF])-1
+        val = getKeyVal(w[VCF_INFO],"END")
+        if val is not None :
+            self.endPos = int(val)
+
+
+class Constants :
+
+    import re
+
+    contigpat = re.compile("^##contig=<ID=([^,>]*)[,>]")
+
+
+def processFile(isUnique, vcfFile, isFirst, chromOrder, header, recList) :
+    """
+    read in a vcf file
+    """
+
+    import re
+
+    for line in open(vcfFile) :
+        if line[0] == "#" :
+            if not isFirst : continue
+            header.append(line)
+            match = re.match(Constants.contigpat,line)
+            if match is not None :
+                chromOrder.append(match.group(1))
+        else :
+            recList.append(VcfRecord(line, isUnique))
+
+
+
+def listInputVcfs(vcfListFile,args) :
+    for arg in args :
+        yield arg
+    if vcfListFile is None : return
+    for vcfFile in open(vcfListFile) :
+        yield vcfFile.strip()
+
+
+
+def getOptions() :
+
+    from optparse import OptionParser
+
+    parser = OptionParser(usage="%prog [options] [input_vcf [input_vcf..]] > output_vcf")
+
+    parser.add_option("-u", dest="isUnique",action="store_true",default=False,
+                      help="filter all but one record with the same {CHR,POS,REF,ALT,(INV3|INV5|)}")
+    parser.add_option("-f", dest="vcfListFile",
+                      help="File listing input vcf files, one file per line. These will be used in addition to any provided directly on the command-line")
+
+    (options,args) = parser.parse_args()
+
+    if len(args) == 0 and not options.vcfListFile:
+        parser.print_help()
+        sys.exit(2)
+
+    # validate input:
+    if options.vcfListFile is not None :
+        if not os.path.exists(options.vcfListFile) :
+            raise Exception("Can't find vcf list file: " + options.vcfListFile)
+
+    for vcfFile in listInputVcfs(options.vcfListFile,args) :
+        if not os.path.isfile(vcfFile) :
+            raise Exception("Can't find input vcf file: " +vcfFile)
+
+    return (options,args)
+
+
+
+def resolveRec(recEqualSet, recList) :
+    """
+    determine which of a set of 'equal' vcf records is the best
+
+    right now best is a record with PASS in the filter field, and
+    secondarily having the highest quality
+    """
+
+    if not recEqualSet: return
+
+    bestIndex=0
+    bestQual=0.
+    bestIsPass=False
+    bestIsAssembled=False
+    for (index,rec) in enumerate(recEqualSet) :
+        try:
+            rec.qual = float(rec.qual)
+        except ValueError:
+            rec.qual = 0.
+
+        assert rec.qual >= 0.
+
+        isNewPass=((not bestIsPass) and rec.isPass)
+        isHighQual=((bestIsPass == rec.isPass) and (rec.qual > bestQual))
+        isNewAssembled=((not bestIsAssembled) and (rec.alt[0] != '<'))
+        if (isNewPass or isHighQual or isNewAssembled) :
+            bestIndex = index
+            bestQual = rec.qual
+            bestIsPass = rec.isPass
+            bestIsAssembled = (rec.alt[0] != '<')
+
+    recList.append(recEqualSet[bestIndex])
+
+
+
+def main() :
+
+    outfp = sys.stdout
+
+    (options,args) = getOptions()
+
+    header=[]
+    recList=[]
+    chromOrder=[]
+
+    isFirst=True
+    for vcfFile in listInputVcfs(options.vcfListFile,args) :
+        processFile(options.isUnique, vcfFile, isFirst, chromOrder, header, recList)
+        isFirst = False
+
+    def vcfRecSortKey(x) :
+        """
+        sort vcf records for final output
+
+        Fancy chromosome sort rules:
+        if contig records are found in the vcf header, then sort chroms in that order
+        for any chrom names not found in the header, sort them in lex order after the
+        found chrom names
+        """
+
+        try :
+            headerOrder = chromOrder.index(x.chrom)
+        except ValueError :
+            headerOrder = size(chromOrder)
+
+        return (headerOrder, x.chrom, x.pos, x.endPos)
+
+    recList.sort(key = vcfRecSortKey)
+
+    for line in header :
+        outfp.write(line)
+
+    def isEqualRec(rec1,rec2) :
+        if (rec1 is None) or (rec2 is None) : return False
+
+        if rec1[0] != rec2[0]: return False # chrom
+        if rec1[1] != rec2[1]: return False # pos
+        if rec1[2] != rec2[2]: return False # ref
+        if rec1[4] != rec2[4]: return False # endPos
+        if rec1[5] != rec2[5]: return False # invState
+
+        # special handling to find duplications when alt is the only difference:
+        if rec1[3] != rec2[3]:
+            if rec1[3] != "<INS>" and rec2[3] != "<INS>":
+                return False
+
+            def matchTest(rec) :
+                if rec[0] == "<" : return False
+                if len(rec) < 80 : return False
+                return True
+
+            if rec1[3] == "<INS>" :
+                return matchTest(rec2[3])
+            if rec2[3] == "<INS>" :
+                return matchTest(rec1[3])
+
+        return True
+
+
+    if options.isUnique :
+        recList2 = []
+        recEqualSet = []
+        lastRec = None
+        for vcfrec in recList :
+            rec = (vcfrec.chrom, vcfrec.pos, vcfrec.ref, vcfrec.alt, vcfrec.endPos, vcfrec.invState)
+            if not isEqualRec(rec,lastRec) :
+                resolveRec(recEqualSet,recList2)
+                recEqualSet = []
+            recEqualSet.append(vcfrec)
+            lastRec = rec
+        resolveRec(recEqualSet,recList2)
+        recList = recList2
+
+    for vcfrec in recList :
+        outfp.write(vcfrec.line)
+
+
+main()
+
diff --git a/src/python/libexec/vcfCmdlineSwapper.py b/src/python/libexec/vcfCmdlineSwapper.py
new file mode 100644
index 0000000..aa9ef5b
--- /dev/null
+++ b/src/python/libexec/vcfCmdlineSwapper.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+"""
+replace or add vcf header cmdline field
+
+usage $0 "new cmdline" < in > out
+"""
+
+import os, sys
+
+prefix="##cmdline="
+
+
+def main() :
+    infp = sys.stdin
+    outfp = sys.stdout
+
+    class State :
+        isNewCLWritten = False
+
+    def writeNewCL(outfp) :
+        line2 = prefix + " ".join(sys.argv[1:]) + "\n"
+        outfp.write(line2)
+        State.isNewCLWritten = True
+
+    for line in infp :
+        if line.startswith("##") :
+            if line.startswith(prefix):
+                writeNewCL(outfp)
+                continue
+        else :
+            if not State.isNewCLWritten :
+                writeNewCL(outfp)
+
+        outfp.write(line)
+
+
+main()
diff --git a/src/srcqc/README.txt b/src/srcqc/README.txt
new file mode 100644
index 0000000..03c8868
--- /dev/null
+++ b/src/srcqc/README.txt
@@ -0,0 +1,2 @@
+This directory contains misc code checking/static analysis which is run as part of the
+default build.
diff --git a/src/srcqc/check_for_nonascii_source.bash b/src/srcqc/check_for_nonascii_source.bash
new file mode 100755
index 0000000..31be0c1
--- /dev/null
+++ b/src/srcqc/check_for_nonascii_source.bash
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+set -o nounset
+set -o pipefail
+
+reltoabs() {
+    (cd $1; pwd -P)
+}
+
+scriptdir=$(reltoabs $(dirname $0))
+srcdir=$(reltoabs $scriptdir/..)
+
+
+if [ $# != 0 ]; then
+    cat <<EOF
+
+usage: $0
+
+check for non-ascii characters in all source files. If found the filename, line number and a visual highlight within each line will be displayed
+
+EOF
+    exit 2
+fi
+
+
+find_cmake_source() {
+    base_dir=$1
+    find $base_dir -type f \
+        -name "*.cmake" -or \
+        -name "CMakeLists.txt"
+}
+
+find_cxx_source() {
+    base_dir=$1
+    find $base_dir -type f \
+        -name "*.cpp" -or \
+        -name "*.c" -or \
+        -name "*.hh" -or \
+        -name "*.h"
+}
+
+find_script_source() {
+    base_dir=$1
+    find $base_dir -type f \
+        -name "*.bash" -or \
+        -name "*.sh" -or \
+        -name "configure" -or \
+        -name "*.py"
+}
+
+get_source() {
+    for f in $srcdir/*; do
+        dir=$(basename $f)
+        if [ $dir == "submodule" ]; then continue; fi
+        find_cmake_source $f
+        find_cxx_source $f
+        find_script_source $f
+    done
+}
+
+is_error=false
+for f in $(get_source); do
+    #echo "checking: $f"
+
+    # not portable to OS X:
+    #grep --color='auto' -n -H -P "[\x80-\xFF]" $f
+
+    # note literal space and tab character in match pattern:
+    #
+    LC_ALL=C grep --color='auto' -n -H "[^ -~	]" $f
+    if [ $? != 1 ]; then
+        is_error=true
+    fi
+done
+
+if $is_error; then
+    echo "ERROR: source contains non-ascii or non-printing characters" 1>&2
+    exit 1
+fi
diff --git a/src/srcqc/run_cppcheck.py b/src/srcqc/run_cppcheck.py
new file mode 100755
index 0000000..09730af
--- /dev/null
+++ b/src/srcqc/run_cppcheck.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python
+#
+# Manta - Structural Variant and Indel Caller
+# Copyright (c) 2013-2016 Illumina, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+
+# if cppcheck is found, run it on all c++ and return an error for *any* warning message:
+
+
+import os
+import sys
+import subprocess
+
+
+def which(searchFile) :
+    """
+    search the PATH for searchFile
+
+    result should be the similar to *nix 'which' utility
+    """
+    for searchPath in os.environ["PATH"].split(os.pathsep):
+        test=os.path.join(searchPath,searchFile)
+        if os.path.isfile(test) : return test
+
+    return None
+
+
+def check_version(executable, versionRequired):
+
+    proc=subprocess.Popen(["cppcheck","--version"],stdout=subprocess.PIPE)
+
+    lines = proc.stdout.readlines()
+    if len(lines) == 0 : return False
+    version = lines[0].split()[1]
+
+    versionReqNums = versionRequired.split('.')
+    versionNums = version.split('.')
+    for ix in xrange(len(versionNums)):
+        if int(versionNums[ix]) < int(versionReqNums[ix]) :
+            return True
+        if int(versionNums[ix]) > int(versionReqNums[ix]) :
+            return False
+
+    return False
+
+
+def usage() :
+
+    scriptName=os.path.basename(__file__)
+
+    usageStr="""
+
+    usage: %s cxx_root_directory
+
+    run cppcheck on project c++ source code, return error for any unsupressed cppcheck issue
+
+""" % (scriptName)
+
+    sys.stderr.write(usageStr)
+    sys.exit(2)
+
+
+
+def main() :
+
+    if len(sys.argv) != 2 :
+        usage()
+
+    srcRoot=sys.argv[1]
+
+    cppcheck_path = which("cppcheck")
+    if cppcheck_path is None :
+        sys.exit(0)
+
+    is_old_version = check_version("cppcheck", "1.69")
+    if is_old_version :
+        sys.exit(0)
+
+    # need to trace real path out of any symlinks so that cppcheck can find its runtime config info:
+    cppcheck_path = os.path.realpath(cppcheck_path)
+
+    checkCmd=[cppcheck_path]
+    checkCmd.append("--enable=all")
+    checkCmd.append("--std=c++11")
+    checkCmd.append("--force")
+    checkCmd.append("--verbose")
+    checkCmd.append("--quiet")
+    checkCmd.append("--inline-suppr")
+
+    # manipulate the warning messages so that they look like gcc errors -- this enables IDE parsing of error location:
+    checkCmd.append("--template={file}:{line}:1: error: {severity}:{message}")
+
+    suppressList=["unusedFunction", "unmatchedSuppression", "missingInclude", "purgedConfiguration"]
+    for stype in suppressList :
+        checkCmd.append("--suppress="+stype)
+
+    # xml output is usful for getting a warnings id field, which is what you need to suppress it:
+    #checkCmd.append("--xml")
+
+    # this is more aggressive  and includes more FPs
+    #checkCmd.append("--inconclusive")
+
+    checkCmd.append(srcRoot)
+
+    proc = subprocess.Popen(checkCmd, stderr=subprocess.PIPE)
+
+    includeError="Cppcheck cannot find all the include files"
+
+    errCount=0
+    for line in proc.stderr :
+
+        # in cppcheck 1.59 missingInclude supression appears to be broken -- this is a workaround:
+        if line.find(includeError) != -1 : continue
+
+        sys.stderr.write(line)
+        errCount += 1
+
+    if errCount != 0 : sys.exit(1)
+
+    open("cppcheck.done", 'w').close()
+
+
+
+if __name__ == '__main__' :
+    main()

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/manta.git



More information about the debian-med-commit mailing list