[med-svn] [Git][med-team/tvc][master] 12 commits: Add watch file using Git mode, Remove code copy of libsmithwaterman

Thu Sep 27 09:15:22 BST 2018

Andreas Tille pushed to branch master at Debian Med / tvc


Commits:
ca31db55 by Andreas Tille at 2018-09-27T07:37:23Z
Add watch file using Git mode, Remove code copy of libsmithwaterman

- - - - -
70e795ac by Andreas Tille at 2018-09-27T07:38:38Z
Add full file exclusion list used in former packaging

- - - - -
21d5d20b by Andreas Tille at 2018-09-27T07:38:38Z
New upstream version 5.0.3+git20151221.80e144e+dfsg
- - - - -
6333ae25 by Andreas Tille at 2018-09-27T07:54:04Z
Use Debian packaged libsmithwaterman

- - - - -
1fba086f by Andreas Tille at 2018-09-27T07:57:59Z
hardening=+all

- - - - -
efe8db80 by Andreas Tille at 2018-09-27T07:59:41Z
Update copyright

- - - - -
68ad33f0 by Andreas Tille at 2018-09-27T08:00:24Z
debhelper 11

- - - - -
4dd922af by Andreas Tille at 2018-09-27T08:00:27Z
Point Vcs fields to salsa.debian.org

- - - - -
d2200ca9 by Andreas Tille at 2018-09-27T08:00:27Z
Standards-Version: 4.2.1

- - - - -
e11696ca by Andreas Tille at 2018-09-27T08:06:37Z
Fix duplicated license

- - - - -
931f804b by Andreas Tille at 2018-09-27T08:08:58Z
Ignore script-with-language-extension as per consensus on Debian Med list

- - - - -
f5165aac by Andreas Tille at 2018-09-27T08:12:15Z
Upload to unstable

- - - - -


26 changed files:

- + README.md
- + README.txt
- debian/changelog
- debian/compat
- debian/control
- debian/copyright
- + debian/lintian-overrides
- debian/patches/series
- + debian/patches/use_debian_packaged_smithwaterman.patch
- debian/rules
- + debian/watch
- − external/vcflib/smithwaterman/BandedSmithWaterman.cpp
- − external/vcflib/smithwaterman/BandedSmithWaterman.h
- − external/vcflib/smithwaterman/IndelAllele.cpp
- − external/vcflib/smithwaterman/IndelAllele.h
- − external/vcflib/smithwaterman/LeftAlign.cpp
- − external/vcflib/smithwaterman/LeftAlign.h
- − external/vcflib/smithwaterman/Makefile
- − external/vcflib/smithwaterman/Mosaik.h
- − external/vcflib/smithwaterman/Repeats.cpp
- − external/vcflib/smithwaterman/Repeats.h
- − external/vcflib/smithwaterman/SWMain.cpp
- − external/vcflib/smithwaterman/SmithWatermanGotoh.cpp
- − external/vcflib/smithwaterman/SmithWatermanGotoh.h
- − external/vcflib/smithwaterman/convert.h
- − external/vcflib/smithwaterman/smithwaterman.cpp


Changes:

=====================================
README.md
=====================================
@@ -0,0 +1,165 @@
+# Ion Torrent Variant Caller
+
+Ion Torrent Variant Caller (TVC) is a genetic variant caller for Ion Torrent sequencing platforms,
+and is specially optimized to exploit the underlying flow signal information in the statistical model
+to evaluate variants. Torrent Variant Caller is designed to call single-nucleotide polymorphisms (SNPs),
+multi-nucleotide polymorphisms (MNPs), insertions, deletions, and block substitutions.
+
+### Quick Start
+```
+git clone https://github.com/domibel/IonTorrent-VariantCaller.git
+
+BASE=`pwd`
+TVC_SOURCE_DIR=$BASE/IonTorrent-VariantCaller
+
+*** adjust build and install directory ***
+TVC_BUILD_DIR=$BASE/IonTorrent-VariantCaller-build
+TVC_INSTALL_DIR=$BASE/IonTorrent-VariantCaller-install/usr/local
+
+mkdir -p $TVC_BUILD_DIR
+mkdir -p $TVC_INSTALL_DIR
+
+cd $TVC_BUILD_DIR
+wget --no-clobber updates.iontorrent.com/updates/software/external/armadillo-4.600.1.tar.gz
+tar xvzf armadillo-4.600.1.tar.gz
+cd armadillo-4.600.1/
+sed -i 's:^// #define ARMA_USE_LAPACK$:#define ARMA_USE_LAPACK:g' include/armadillo_bits/config.hpp
+sed -i 's:^// #define ARMA_USE_BLAS$:#define ARMA_USE_BLAS:g'     include/armadillo_bits/config.hpp
+cmake .
+make -j4
+
+cd $TVC_BUILD_DIR
+wget --no-clobber updates.iontorrent.com/updates/software/external/bamtools-2.4.0.20150702+git15eadb925f.tar.gz
+tar xvzf bamtools-2.4.0.20150702+git15eadb925f.tar.gz
+mkdir bamtools-2.4.0.20150702+git15eadb925f-build
+cd bamtools-2.4.0.20150702+git15eadb925f-build
+cmake ../bamtools-2.4.0.20150702+git15eadb925f -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo
+make -j4
+
+cd $TVC_BUILD_DIR
+wget --no-clobber --no-check-certificate https://github.com/samtools/htslib/archive/1.2.1.tar.gz -O htslib-1.2.1.tar.gz
+tar xvzf htslib-1.2.1.tar.gz
+ln -s htslib-1.2.1 htslib # for samtools
+cd htslib-1.2.1
+make -j4
+
+cd $TVC_BUILD_DIR
+wget --no-clobber --no-check-certificate https://github.com/samtools/samtools/archive/1.2.tar.gz -O samtools-1.2.tar.gz
+tar xvzf samtools-1.2.tar.gz
+cd samtools-1.2
+make -j4
+mkdir $TVC_INSTALL_DIR/bin
+cp samtools $TVC_INSTALL_DIR/bin/
+
+cd $TVC_BUILD_DIR
+mkdir TVC-build
+cd TVC-build
+cmake $TVC_SOURCE_DIR -DCMAKE_INSTALL_PREFIX:PATH=$TVC_INSTALL_DIR -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo
+make -j4 install
+
+*** quick test ***
+TVC_ROOT_DIR=$TVC_INSTALL_DIR
+$TVC_ROOT_DIR/bin/variant_caller_pipeline.py \
+    --input-bam       $TVC_ROOT_DIR/share/TVC/examples/example1/test.bam \
+    --reference-fasta $TVC_ROOT_DIR/share/TVC/examples/example1/reference.fasta \
+    --region-bed      $TVC_ROOT_DIR/share/TVC/examples/example1/test_merged_plain.bed \
+    --primer-trim-bed $TVC_ROOT_DIR/share/TVC/examples/example1/test_unmerged_detail.bed
+    
+*** export PATH, following tools are required: samtools zip tvc ***
+export PATH=$PATH:$TVC_ROOT_DIR/bin
+```
+
+
+### Get reference genome hg19 from Ion Torrent, Size: 868170684 (828M)
+
+##### Download zip archive
+```
+$ wget http://ionupdates.com/reference/hg19.zip
+```
+
+##### Unpack zip archive
+```
+$ unzip hg19.zip
+```
+
+##### Create hg19.fasta.fai
+```
+$ samtools faidx hg19.fasta    
+```
+
+##### Content
+```
+$ md5sum hg19.zip hg19.fasta hg19.fasta.fai
+21afdf14b6b3734e0d25157e60bdfb24  hg19.zip
+d6851f9f4537ff4e9beb5b7a08b89230  hg19.fasta
+cdcd61262b6f4af8e8ac6c2be88a161a  hg19.fasta.fai
+
+$ ls -l
+-rw-r--r-- 1 ionadmin ionadmin 3147289017 Mar 22  2011 hg19.fasta
+-rw-rw-r-- 1 ionadmin ionadmin        788 Nov 18 16:18 hg19.fasta.fai
+-rw-rw-r-- 1 ionadmin ionadmin  868170684 Jul  8  2014 hg19.zip
+```
+
+### Examples
+
+#### 316 - H. sapiens - AmpliSeq BRCA1/BRCA2 Community Panel
+##### https://ioncommunity.thermofisher.com/docs/DOC-7515
+##### CN:TorrentServer/BRCA   PL:IONTORRENT   PU:PGM/316D/IonXpress_009
+```
+wget http://ion-torrent.s3.amazonaws.com/pgm/BRCArun94/BRCArun94IonXpress_009_rawlib.bam          # 163535738 156M
+wget http://ion-torrent.s3.amazonaws.com/pgm/BRCArun94/BRCArun94IonXpress_009_rawlib.bam.bai      #   1675968 1.6M
+wget http://ion-torrent.s3.amazonaws.com/pgm/BRCArun94/BRCArun94TSVC_variants_IonXpress_009.vcf   #     85850 84K
+
+variant_caller_pipeline.py \
+ --input-bam BRCArun94IonXpress_009_rawlib.bam \
+ --reference-fasta hg19.fasta \
+ --parameters-file=/usr/share/TVC/pluginMedia/parameter_sets/ampliseq_somatic_lowstringency_pgm_parameters.json \
+ --generate-gvcf=on \
+ --num-threads=4
+```
+
+
+#### Alternative: Get reference genome hg19 with TMAP index files from Ion Torrent, Size: 4613331358 (4.3G)
+
+```
+$ wget http://ionupdates.com/reference_downloads/hg19.zip
+$ unzip hg19.zip
+```
+
+##### Content
+```
+$ md5sum *
+d2dacce998d72687ca1f2dd28ce2280e  CreateSequenceDictionary.log
+d264f416c20f7d144d74924ab7c356a0  hg19.dict
+d6851f9f4537ff4e9beb5b7a08b89230  hg19.fasta
+cdcd61262b6f4af8e8ac6c2be88a161a  hg19.fasta.fai
+6903f05eef92e5e6f5a6bf890a01d6e6  hg19.fasta.tmap.anno
+f9d7c7dedf322022a0393d99a629229b  hg19.fasta.tmap.bwt
+19c79b65021812df67904f7448367d5f  hg19.fasta.tmap.pac
+ebd201bec6945900ce9f26b62cf89915  hg19.fasta.tmap.sa
+09b3113801b9c6ae69134f8712ef6a8c  hg19.info.txt
+3bc8b68ce604c77f2fecd14ab20a13bc  hg19.md5sum.txt
+3b725f35b395cf763bb7ac2110ab8d1a  hg19.zip
+d41d8cd98f00b204e9800998ecf8427e  samtools.log
+b9ba9df7fc3cd18883e7922bc6dd20d6  tmap.log
+```
+```
+$ ls -l
+total 13218824
+-rwxr-xr-x 1 ionadmin ionadmin        624 May  6  2013 CreateSequenceDictionary.log
+-rwxr-xr-x 1 ionadmin ionadmin       3174 May  6  2013 hg19.dict
+-rwxr-xr-x 1 ionadmin ionadmin 3147289017 May  6  2013 hg19.fasta
+-rwxr-xr-x 1 ionadmin ionadmin        788 May  6  2013 hg19.fasta.fai
+-rwxr-xr-x 1 ionadmin ionadmin       3985 May  6  2013 hg19.fasta.tmap.anno
+-rwxr-xr-x 1 ionadmin ionadmin 3453608024 May  6  2013 hg19.fasta.tmap.bwt
+-rwxr-xr-x 1 ionadmin ionadmin  773923497 May  6  2013 hg19.fasta.tmap.pac
+-rwxr-xr-x 1 ionadmin ionadmin 1547847008 May  6  2013 hg19.fasta.tmap.sa
+-rwxr-xr-x 1 ionadmin ionadmin         92 May  6  2013 hg19.info.txt
+-rwxr-xr-x 1 ionadmin ionadmin        642 May  6  2013 hg19.md5sum.txt
+-rw-rw-r-- 1 ionadmin ionadmin 4613331358 Jul  8  2014 hg19.zip
+-rwxr-xr-x 1 ionadmin ionadmin          0 May  6  2013 samtools.log
+-rwxr-xr-x 1 ionadmin ionadmin      15170 May  6  2013 tmap.log
+```
+
+
+See also https://ioncommunity.thermofisher.com/community/products/software/torrent-variant-caller


=====================================
README.txt
=====================================
@@ -0,0 +1,198 @@
+Compilation instructions for TVC
+
+# Steps 1 through 11 explain how to compile TVC.
+# Step 12 onward explain how to deploy and run the compiled version on a single computer or cluster.
+
+For more information see:
+http://ioncommunity.lifetechnologies.com/community/products/torrent-variant-caller
+
+# 1. Download source file
+
+wget updates.iontorrent.com/tvc_standalone/tvc-5.0.3.tar.gz
+
+
+# 2. Copy source file into build root directory
+
+TVC_VERSION=tvc-5.0.3
+
+BUILD_ROOT_DIR=`mktemp -d`
+cp $TVC_VERSION.tar.gz $BUILD_ROOT_DIR
+DISTRIBUTION_CODENAME=`lsb_release -is`_`lsb_release -rs`_`uname -m`
+TVC_INSTALL_DIR=$BUILD_ROOT_DIR/$TVC_VERSION-$DISTRIBUTION_CODENAME-binary
+mkdir -p $TVC_INSTALL_DIR/bin/
+
+# 3. Install dependencies
+
+# 3.1 RedHat/CentOS
+
+yum -y install gcc-c++ cmake zlib-devel bzip2-devel bzip2 \
+ncurses-devel python-simplejson atlas-devel blas-devel lapack-devel redhat-lsb-core boost-devel \
+# perl perl-Data-Dumper # with version 7
+
+
+# 3.2 Debian/Ubuntu
+
+sudo aptitude install g++ cmake zlib1g-dev libbz2-dev libncurses-dev libboost-math-dev \
+libatlas-dev liblapack-dev
+
+
+# 3.3 cmake
+
+# Required is cmake (>=2.8.0), you can check the installed version with e.g.:
+
+#  $ cmake -version
+#  cmake version 2.8.0
+
+# Installation of a newer cmake version is only required if the installed version is older than 2.8.0
+
+# To delete the old cmake package:
+
+# Redhat/CentOS:
+yum -y erase cmake
+
+# Debian/Ubuntu:
+aptitude purge cmake
+
+cd ~
+wget http://www.cmake.org/files/v2.8/cmake-2.8.11.2.tar.gz
+tar xvzf cmake-2.8.11.2.tar.gz
+cd cmake-2.8.11.2
+./configure
+make -j5
+make install
+
+
+# 4. build armadillo
+cd $BUILD_ROOT_DIR
+wget http://updates.iontorrent.com/updates/software/external/armadillo-4.600.1.tar.gz
+tar xvzf armadillo-4.600.1.tar.gz
+cd armadillo-4.600.1/
+sed -i 's:^// #define ARMA_USE_LAPACK$:#define ARMA_USE_LAPACK:g' include/armadillo_bits/config.hpp
+sed -i 's:^// #define ARMA_USE_BLAS$:#define ARMA_USE_BLAS:g'     include/armadillo_bits/config.hpp
+cmake .
+make -j4
+
+
+# 5. build bamtools
+cd $BUILD_ROOT_DIR
+wget updates.iontorrent.com/updates/software/external/bamtools-2.4.0.20150702+git15eadb925f.tar.gz
+tar xvzf bamtools-2.4.0.20150702+git15eadb925f.tar.gz
+mkdir bamtools-2.4.0.20150702+git15eadb925f-build
+cd bamtools-2.4.0.20150702+git15eadb925f-build
+cmake ../bamtools-2.4.0.20150702+git15eadb925f -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo
+make -j4
+
+
+# 7. build htslib
+cd $BUILD_ROOT_DIR
+wget --no-check-certificate https://github.com/samtools/htslib/archive/1.2.1.tar.gz -O htslib-1.2.1.tar.gz
+tar xvzf htslib-1.2.1.tar.gz
+ln -s htslib-1.2.1 htslib # for samtools
+cd htslib-1.2.1
+make -j4
+
+
+# 8. build samtools
+cd $BUILD_ROOT_DIR
+wget --no-check-certificate https://github.com/samtools/samtools/archive/1.2.tar.gz -O samtools-1.2.tar.gz
+tar xvzf samtools-1.2.tar.gz
+cd samtools-1.2
+make -j4
+cp samtools $TVC_INSTALL_DIR/bin/
+
+
+# 10. build TVC
+cd $BUILD_ROOT_DIR
+tar xvzf $TVC_VERSION.tar.gz
+TVC_SOURCE_DIR=$BUILD_ROOT_DIR/$TVC_VERSION
+mkdir $TVC_VERSION-build
+cd $TVC_VERSION-build
+cmake $TVC_SOURCE_DIR -DCMAKE_INSTALL_PREFIX:PATH=$TVC_INSTALL_DIR -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo
+make -j4 install
+
+# 11. create binary package
+tar cvzf $TVC_VERSION-$DISTRIBUTION_CODENAME-binary.tar.gz -C $BUILD_ROOT_DIR $TVC_VERSION-$DISTRIBUTION_CODENAME-binary
+
+
+######################################################################################
+
+# 12.1 Either use the TVC version from the (temporary) TVC_INSTALL_DIR directory
+
+TVC_ROOT_DIR=$TVC_INSTALL_DIR
+
+
+# 12.2 or use the TVC binary package
+
+tar xvzf $TVC_VERSION-$DISTRIBUTION_CODENAME-binary.tar.gz
+TVC_ROOT_DIR=`pwd`/$TVC_VERSION-$DISTRIBUTION_CODENAME-binary
+
+
+# 13. export PATH, following tools are required: samtools zip tvc
+export PATH=$PATH:$TVC_ROOT_DIR/bin
+
+
+# 14. adjust some file paths and invoke TVC
+
+# Required are 1 reference, 2 bed files, 1 aligned bam file, and 1 tvc parameter file
+
+# Example 1:
+
+$TVC_ROOT_DIR/bin/variant_caller_pipeline.py \
+    --input-bam       $TVC_ROOT_DIR/share/TVC/examples/example1/test.bam \
+    --reference-fasta $TVC_ROOT_DIR/share/TVC/examples/example1/reference.fasta \
+    --region-bed      $TVC_ROOT_DIR/share/TVC/examples/example1/test_merged_plain.bed \
+    --primer-trim-bed $TVC_ROOT_DIR/share/TVC/examples/example1/test_unmerged_detail.bed
+
+# Example 1 with specified parameter file and output directory:
+
+$TVC_ROOT_DIR/bin/variant_caller_pipeline.py \
+    --input-bam       $TVC_ROOT_DIR/share/TVC/examples/example1/test.bam \
+    --reference-fasta $TVC_ROOT_DIR/share/TVC/examples/example1/reference.fasta \
+    --region-bed      $TVC_ROOT_DIR/share/TVC/examples/example1/test_merged_plain.bed \
+    --primer-trim-bed $TVC_ROOT_DIR/share/TVC/examples/example1/test_unmerged_detail.bed \
+    --parameters-file $TVC_ROOT_DIR/share/TVC/pluginMedia/parameter_sets/ampliseq_somatic_lowstringency_pgm_parameters.json \
+    --output-dir      /tmp/tvc_example1
+
+
+# Example 2 (not included yet) :
+
+# Required are 1 reference, 2 bed files, 1 aligned bam file, and 1 tvc parameter file
+                REF=/mnt/TS/source/hg19/hg19.fasta
+BED_UNMERGED_DETAIL=/mnt/TS/source/tvc_test_GB1-118-CSI/unmerged/detail/Exome_draft_Designed_20130531.bed
+   BED_MERGED_PLAIN=/mnt/TS/source/tvc_test_GB1-118-CSI/merged/plain/Exome_draft_Designed_20130531.bed
+                BAM=/mnt/TS/source/tvc_test_GB1-118-CSI/IonXpress_045_rawlib.bam
+          TVC_PARAM=$TVC_ROOT_DIR/share/TVC/pluginMedia/configs/germline_low_stringency_proton.json
+
+TMP_DIR=`mktemp -d`
+
+$TVC_ROOT_DIR/bin/variant_caller_pipeline.py \
+    --parameters-file $TVC_PARAM \
+    --input-bam $BAM \
+    --reference-fasta $REF \
+    --region-bed $BED_MERGED_PLAIN \
+    --primer-trim-bed $BED_UNMERGED_DETAIL \
+    --postprocessed-bam $TMP_DIR/trimmed.bam \
+    --output-dir $TMP_DIR
+
+
+# Example 3 (hotspot) (not included yet):
+
+                REF=/mnt/TS/source/hg19/hg19.fasta
+BED_UNMERGED_DETAIL=/mnt/TS/source/tvc_test_Z06-506-CCP/unmerged_detail_CCP.20131001.designed.bed
+   BED_MERGED_PLAIN=/mnt/TS/source/tvc_test_Z06-506-CCP/merged_plain_CCP.20131001.designed.bed
+                BAM=/mnt/TS/source/tvc_test_Z06-506-CCP/IonXpress_019_rawlib.bam
+          TVC_PARAM=/mnt/TS/source/tvc_test_Z06-506-CCP/local_parameters.json
+            HOTSPOT=/mnt/TS/source/tvc_test_Z06-506-CCP/hotspot.vcf
+
+TMP_DIR=`mktemp -d`
+
+time $TVC_ROOT_DIR/bin/variant_caller_pipeline.py \
+    --parameters-file $TVC_PARAM \
+    --input-bam $BAM \
+    --reference-fasta $REF \
+    --region-bed $BED_MERGED_PLAIN \
+    --primer-trim-bed $BED_UNMERGED_DETAIL \
+    --hotspot-vcf $HOTSPOT \
+    --postprocessed-bam $TMP_DIR/trimmed.bam \
+    --output-dir $TMP_DIR
+


=====================================
debian/changelog
=====================================
@@ -1,9 +1,22 @@
-tvc (5.0.3+dfsg-3) UNRELEASED; urgency=medium
+tvc (5.0.3+git20151221.80e144e+dfsg-1) unstable; urgency=medium
 
+  [ Steffen Moeller ]
   * Added notion of lacking entries in research software
     registries in debian/upstream/metadata
 
- -- Steffen Moeller <moeller at debian.org>  Thu, 14 Sep 2017 09:39:02 +0200
+  [ Andreas Tille ]
+  * Add watch file using Git mode
+  * Remove code copy of libsmithwaterman, mention all other files that
+    were previously removed in d/copyright
+  * Use Debian packaged libsmithwaterman
+  * hardening=+all
+  * debhelper 11
+  * Point Vcs fields to salsa.debian.org
+  * Standards-Version: 4.2.1
+  * Ignore script-with-language-extension as per consensus on Debian Med
+    list
+
+ -- Andreas Tille <tille at debian.org>  Thu, 27 Sep 2018 10:09:12 +0200
 
 tvc (5.0.3+dfsg-2) experimental; urgency=medium
 


=====================================
debian/compat
=====================================
@@ -1 +1 @@
-10
+11


=====================================
debian/control
=====================================
@@ -4,15 +4,16 @@ Uploaders: Dominique Belhachemi <domibel at debian.org>,
            Andreas Tille <tille at debian.org>
 Section: science
 Priority: optional
-Build-Depends: debhelper (>= 10),
+Build-Depends: debhelper (>= 11~),
                cmake,
                libbamtools-dev,
                libjsoncpp-dev,
                libarmadillo-dev,
-               libboost-math-dev
-Standards-Version: 3.9.8
-Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/tvc.git
-Vcs-Git: https://anonscm.debian.org/git/debian-med/tvc.git
+               libboost-math-dev,
+               libsmithwaterman-dev
+Standards-Version: 4.2.1
+Vcs-Browser: https://salsa.debian.org/med-team/tvc
+Vcs-Git: https://salsa.debian.org/med-team/tvc.git
 Homepage: http://ioncommunity.thermofisher.com
 
 Package: tvc


=====================================
debian/copyright
=====================================
@@ -1,10 +1,42 @@
-Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
 Upstream-Name: tvc
-Source: https://github.com/iontorrent
-        http://updates.iontorrent.com/tvc_standalone/ (Dominique Belhachemi <domibel at debian.org>, Fri, 13 Jan 2017 10:11:18 -0500)
+Source: https://github.com/domibel/IonTorrent-VariantCaller
+Files-Excluded: */smithwaterman
+                */jsoncpp-src*
+                */.gitignore
+                */tabixpp/perl
+                */tabixpp/python
+                */tabixpp/Makefile
+                */tabixpp/*.java
+                */tabixpp/example*
+                */tabixpp/*.py
+                */tabixpp/*.tex
+                */pluginMedia/configs
+                */pluginMedia/hotspots
+                */parameter_sets/4_*
+                */parameter_sets/builtin_parameter_sets.json
+                */parameter_sets/parameter_sets.json
+                */parameter_sets/*.py
 
 Files: *
 Copyright: 2010-2014 Ion Torrent Systems, Inc.
+License: GPL-2+
+
+Files: external/freebayes/*
+Copyright: 2010 Erik Garrison, Gabor Marth
+License: MIT
+
+Files: external/vcflib/*
+Copyright: 2008-2009 Genome Research Ltd (GRL)
+           2008-2010 Broad Institute / Massachusetts Institute of Technology
+           2010-2012 Erik Garrison
+License: MIT
+
+Files: debian/*
+Copyright: 2015 Dominique Belhachemi <domibel at debian.org>
+           2017-2018 Andreas Tille <tille at debian.org>
+License: GPL-2+
+
 License: GPL-2+
  This package is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,39 +48,9 @@ License: GPL-2+
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  .
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>
- .
  On Debian systems, the complete text of the GNU General
  Public License version 2 can be found in "/usr/share/common-licenses/GPL-2".
 
-
-Files: external/freebayes/*
-Copyright: 2010 Erik Garrison, Gabor Marth
-License: MIT
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- .
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- .
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
-
-
-Files: external/vcflib/*
-Copyright: 2008-2009 Genome Research Ltd (GRL)
-           2008-2010 Broad Institute / Massachusetts Institute of Technology
-           2010-2012 Erik Garrison
 License: MIT
  Permission is hereby granted, free of charge, to any person obtaining a copy
  of this software and associated documentation files (the "Software"), to deal
@@ -67,23 +69,3 @@ License: MIT
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  THE SOFTWARE.
-
-
-Files: debian/*
-Copyright: 2015 Dominique Belhachemi <domibel at debian.org>
-License: GPL-2+
- This package is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- .
- This package is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- GNU General Public License for more details.
- .
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>
- .
- On Debian systems, the complete text of the GNU General
- Public License version 2 can be found in "/usr/share/common-licenses/GPL-2".


=====================================
debian/lintian-overrides
=====================================
@@ -0,0 +1,2 @@
+# see https://lists.debian.org/debian-med/2018/06/msg00043.html
+tvc: script-with-language-extension usr/bin/variant_caller_pipeline.py


=====================================
debian/patches/series
=====================================
@@ -3,3 +3,5 @@ bamtools.patch
 help.patch
 hurd.patch
 sse.patch
+use_debian_packaged_smithwaterman.patch
+# use_debian_packaged_vcflib.patch


=====================================
debian/patches/use_debian_packaged_smithwaterman.patch
=====================================
@@ -0,0 +1,42 @@
+Description: Use Debian packaged libsmithwaterman
+Author: Andreas Tille <tille at debian.org>
+Last-Update: Thu, 27 Sep 2018 08:59:37 +0200
+
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -85,10 +85,6 @@ set(tvcSRCS
+   ${ION_VCFLIB_DIR}/tabixpp/tabix.cpp
+   ${ION_VCFLIB_DIR}/tabixpp/index.c
+   ${ION_VCFLIB_DIR}/tabixpp/bgzf.c
+-  ${ION_VCFLIB_DIR}/smithwaterman/LeftAlign.cpp
+-  ${ION_VCFLIB_DIR}/smithwaterman/Repeats.cpp
+-  ${ION_VCFLIB_DIR}/smithwaterman/IndelAllele.cpp
+-  ${ION_VCFLIB_DIR}/smithwaterman/SmithWatermanGotoh.cpp
+   ${ION_FREEBAYES_DIR}/src/AlleleParser.cpp
+ 
+   BaseCaller/PIDloop.cpp
+@@ -108,7 +104,7 @@ if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL
+ endif()
+ 
+ add_executable(tvc ${tvcSRCS})
+-target_link_libraries(tvc bamtools z pthread blas lapack armadillo jsoncpp)
++target_link_libraries(tvc bamtools z pthread blas lapack armadillo jsoncpp disorder smithwaterman)
+ 
+ 
+ add_executable(tvcassembly
+@@ -135,14 +131,10 @@ add_executable(tvcutils
+   ${ION_VCFLIB_DIR}/tabixpp/tabix.cpp
+   ${ION_VCFLIB_DIR}/tabixpp/index.c
+   ${ION_VCFLIB_DIR}/tabixpp/bgzf.c
+-  ${ION_VCFLIB_DIR}/smithwaterman/LeftAlign.cpp
+-  ${ION_VCFLIB_DIR}/smithwaterman/Repeats.cpp
+-  ${ION_VCFLIB_DIR}/smithwaterman/IndelAllele.cpp
+-  ${ION_VCFLIB_DIR}/smithwaterman/SmithWatermanGotoh.cpp
+   ${PROJECT_BINARY_DIR}/IonVersion.cpp
+ )
+ 
+-target_link_libraries(tvcutils bamtools z jsoncpp)
++target_link_libraries(tvcutils bamtools z jsoncpp disorder smithwaterman)
+ 
+ install(TARGETS   tvc                                                 DESTINATION bin)
+ install(TARGETS   tvcassembly                                         DESTINATION bin)


=====================================
debian/rules
=====================================
@@ -6,7 +6,7 @@ DPKG_EXPORT_BUILDFLAGS = 1
 include /usr/share/dpkg/default.mk
 
 # see FEATURE AREAS in dpkg-buildflags(1)
-#export DEB_BUILD_MAINT_OPTIONS = hardening=+all
+export DEB_BUILD_MAINT_OPTIONS = hardening=+all
 
 # see ENVIRONMENT in dpkg-buildflags(1)
 # package maintainers to append CFLAGS


=====================================
debian/watch
=====================================
@@ -0,0 +1,7 @@
+version=4
+
+opts="mode=git,pretty=5.0.3+git%cd.%h,repacksuffix=+dfsg,dversionmangle=s/\+dfsg//g,repack,compression=xz" \
+    https://github.com/domibel/IonTorrent-VariantCaller.git HEAD
+
+# This is not really maintained legacy code but useful anyway
+# It is not to be expected that upstream will set any tags


=====================================
external/vcflib/smithwaterman/BandedSmithWaterman.cpp deleted
=====================================
@@ -1,670 +0,0 @@
-#include "BandedSmithWaterman.h"
-
-// define our static constants
-const float CBandedSmithWaterman::FLOAT_NEGATIVE_INFINITY = (float)-1e+30;
-
-const DirectionType CBandedSmithWaterman::Directions_STOP     = 0;
-const DirectionType CBandedSmithWaterman::Directions_LEFT     = 1;
-const DirectionType CBandedSmithWaterman::Directions_DIAGONAL = 2;
-const DirectionType CBandedSmithWaterman::Directions_UP       = 3;
-
-const PositionType CBandedSmithWaterman::Position_REF_AND_QUERY_ZERO    = 0;
-const PositionType CBandedSmithWaterman::Position_REF_ZERO              = 1;
-const PositionType CBandedSmithWaterman::Position_QUERY_ZERO            = 2;
-const PositionType CBandedSmithWaterman::Position_REF_AND_QUERO_NONZERO = 3;
-
-// constructor
-CBandedSmithWaterman::CBandedSmithWaterman(float matchScore, float mismatchScore, float gapOpenPenalty, float gapExtendPenalty, unsigned int bandWidth) 
-: mCurrentMatrixSize(0)
-, mCurrentAnchorSize(0)
-, mCurrentAQSumSize(0)
-, mBandwidth(bandWidth)
-, mPointers(NULL)
-, mMatchScore(matchScore)
-, mMismatchScore(mismatchScore)
-, mGapOpenPenalty(gapOpenPenalty)
-, mGapExtendPenalty(gapExtendPenalty)
-, mAnchorGapScores(NULL)
-, mBestScores(NULL)
-, mReversedAnchor(NULL)
-, mReversedQuery(NULL)
-, mUseHomoPolymerGapOpenPenalty(false)
-{
-	CreateScoringMatrix();
-
-	//if((bandWidth % 2) != 1) {
-		//printf("ERROR: The bandwidth must be an odd number.\n");
-		//exit(1);
-	//}
-
-	try {
-		mBestScores	 = new float[bandWidth + 2];
-		mAnchorGapScores = new float[bandWidth + 2];
-	} catch(bad_alloc) {
-		printf("ERROR: Unable to allocate enough memory for the banded Smith-Waterman algorithm.\n");
-		exit(1);
-	}
-}
-
-// destructor
-CBandedSmithWaterman::~CBandedSmithWaterman(void) {
-	if(mPointers)              delete [] mPointers;
-	if(mAnchorGapScores)       delete [] mAnchorGapScores;
-	if(mBestScores)            delete [] mBestScores;
-	if(mReversedAnchor)        delete [] mReversedAnchor;
-	if(mReversedQuery)         delete [] mReversedQuery;
-}
-
-// aligns the query sequence to the anchor using the Smith Waterman Gotoh algorithm
-void CBandedSmithWaterman::Align(unsigned int& referenceAl, string& cigarAl, const string& s1, const string& s2, pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> >& hr) {
-
-
-	
-	unsigned int rowStart = min(hr.first.first, (unsigned int)hr.second.first);
-	hr.first.first    -= rowStart;
-	hr.second.first   -= rowStart;
-	
-	//bool isLegalBandWidth = (s2.length() - hr.QueryBegin) > (mBandwidth / 2);
-	//     isLegalBandWidth = isLegalBandWidth && ((s1.length() - hr.Begin) > (mBandwidth / 2));
-
-
-
-	// check the lengths of the input sequences
-	//if( (s1.length() <= 0) || (s2.length() <= 0) || (s1.length() < s2.length()) ) {
-	//	printf("ERROR: An unexpected sequence length was encountered during pairwise alignment.\n");
-	//	printf("Sequence lengths are listed as following:\n");
-	//	printf("1. Reference length: %u\n2. Query length: %u\n", s1.length(), s2.length());
-		//printf("3. Hash region in reference:%4u-%4u\n", hr.Begin + rowStart, hr.End);
-		//printf("4. Hash region in query:    %4u-%4u\n", hr.QueryBegin + rowStart, hr.QueryEnd);
-	//	exit(1);
-	//}
-
-	
-	// determine the hash region type
-	unsigned int rowOffset;
-	unsigned int columnOffset;
-	PositionType positionType;
-
-	if(hr.first.first == 0) {
-		if(hr.second.first == 0) {
-			rowOffset    = 1;
-			columnOffset = (mBandwidth / 2) + 1;
-			positionType = Position_REF_AND_QUERY_ZERO;
-		} else {
-			rowOffset    = 1 - hr.second.first;
-			columnOffset = (mBandwidth / 2) + 1 + hr.second.first;
-			positionType = Position_REF_ZERO;
-		}
-	} else {
-		if(hr.second.first == 0) {
-			rowOffset    = 1;
-			columnOffset = (mBandwidth / 2) + 1 - hr.first.first;
-			positionType = Position_QUERY_ZERO;
-		} else {
-			rowOffset    = 1 - hr.second.first;
-			columnOffset = (mBandwidth / 2) + 1 + hr.second.first - hr.first.first;
-			positionType = Position_REF_AND_QUERO_NONZERO;
-		}
-	}
-
-	// =========================
-	// Reinitialize the matrices
-	// =========================
-	
-	ReinitializeMatrices(positionType, s1.length(), s2.length(), hr);
-
-	// =======================================
-	// Banded Smith-Waterman forward algorithm
-	// =======================================
-
-	unsigned int bestColumn	= 0;
-	unsigned int bestRow	= 0;
-	float bestScore         = FLOAT_NEGATIVE_INFINITY;
-	float currentQueryGapScore;
-
-	// rowNum and column indicate the row and column numbers in the Smith-Waterman matrix respectively
-	unsigned int rowNum    = hr.second.first;
-	unsigned int columnNum = hr.first.first;
-
-	// indicates how many rows including blank elements in the Banded SmithWaterman
-	int numBlankElements = (mBandwidth / 2) - columnNum;
-
-	//cout << numBlankElements << endl;
-	// upper triangle matrix in Banded Smith-Waterman
-	for( ; numBlankElements > 0; numBlankElements--, rowNum++){
-		// in the upper triangle matrix, we always start at the 0th column
-		columnNum = 0;
-
-		// columnEnd indicates how many columns which should be dealt with in the current row
-		unsigned int columnEnd = min((mBandwidth - numBlankElements), ((unsigned int) s1.length() - columnNum + 1) );
-		currentQueryGapScore = FLOAT_NEGATIVE_INFINITY;
-		for( unsigned int j = 0; j < columnEnd; j++){
-			float score = CalculateScore(s1, s2, rowNum, columnNum, currentQueryGapScore, rowOffset, columnOffset);
-			//cout << s1[columnNum] << s2[rowNum] << score << endl;
-			UpdateBestScore(bestRow, bestColumn, bestScore, rowNum, columnNum, score);
-			columnNum++;
-		}
-
-		// replace the columnNum to the middle column in the Smith-Waterman matrix
-		columnNum = columnNum - (mBandwidth / 2);
-	}
-	// complete matrix in Banded Smith-Waterman
-	unsigned int completeNum = min((s1.length() - columnNum - (mBandwidth / 2)), (s2.length() - rowNum));
-	//cout << completeNum << endl;
-	for(unsigned int i = 0; i < completeNum; i++, rowNum++){
-		columnNum = columnNum - (mBandwidth / 2);
-
-		// there are mBandwidth columns which should be dealt with in each row
-		currentQueryGapScore = FLOAT_NEGATIVE_INFINITY;
-
-		for(unsigned int j = 0; j < mBandwidth; j++){
-			float score = CalculateScore(s1, s2, rowNum, columnNum, currentQueryGapScore, rowOffset, columnOffset);
-			UpdateBestScore(bestRow, bestColumn, bestScore, rowNum, columnNum, score);
-			//cout << s1[columnNum] << s2[rowNum] << score << endl;
-			columnNum++;
-		}
-
-		// replace the columnNum to the middle column in the Smith-Waterman matrix
-		// because mBandwidth is an odd number, everytime the following equation shifts a column (pluses 1).
-		columnNum = columnNum - (mBandwidth / 2);
-	}
-	
-	// lower triangle matrix
-	numBlankElements = min(mBandwidth, ((unsigned int) s2.length() - rowNum));
-	columnNum = columnNum - (mBandwidth / 2);
-	for(unsigned int i = 0; numBlankElements > 0; i++, rowNum++, numBlankElements--) {
-
-		mBestScores[ mBandwidth - i ] = FLOAT_NEGATIVE_INFINITY;;
-		// columnEnd indicates how many columns which should be dealt with
-		currentQueryGapScore = FLOAT_NEGATIVE_INFINITY;
-
-		for( unsigned int j = columnNum; j < s1.length(); j++){
-			float score = CalculateScore(s1, s2, rowNum, columnNum, currentQueryGapScore, rowOffset, columnOffset);
-			UpdateBestScore(bestRow, bestColumn, bestScore, rowNum, columnNum, score);
-			//cout << s1[columnNum] << s2[rowNum] << score << endl;
-			columnNum++;
-		}
-
-		// replace the columnNum to the middle column in the Smith-Waterman matrix
-		columnNum = columnNum - mBandwidth + i + 2;
-	}
-
-	// =========================================
-	// Banded Smith-Waterman backtrace algorithm
-	// =========================================
-
-	Traceback(referenceAl, cigarAl, s1, s2, bestRow, bestColumn, rowOffset, columnOffset);
-
-}
-
-// calculates the score during the forward algorithm
-float CBandedSmithWaterman::CalculateScore(const string& s1, const string& s2, const unsigned int rowNum, const unsigned int columnNum, float& currentQueryGapScore, const unsigned int rowOffset, const unsigned int columnOffset) {
-
-	// initialize
-	const unsigned int row      = rowNum + rowOffset;
-	const unsigned int column   = columnOffset - rowNum + columnNum;
-	const unsigned int position = row * (mBandwidth + 2) + column;
-
-	// retrieve the similarity scores
-	const float similarityScore      = mScoringMatrix[s1[columnNum] - 'A'][s2[rowNum] - 'A'];
-	const float totalSimilarityScore = mBestScores[column] + similarityScore;
-
-	// ================================
-	// open a gap in the query sequence
-	// ================================
-
-	float queryGapExtendScore = currentQueryGapScore - mGapExtendPenalty;
-	float queryGapOpenScore   = mBestScores[column - 1] - mGapOpenPenalty;
-
-	// compute the homo-polymer gap score if enabled
-	if(mUseHomoPolymerGapOpenPenalty)
-		if((rowNum > 1) && (s2[rowNum] == s2[rowNum - 1]))
-			queryGapOpenScore = mBestScores[column - 1] - mHomoPolymerGapOpenPenalty;
-
-	if(queryGapExtendScore > queryGapOpenScore) {
-		currentQueryGapScore = queryGapExtendScore;
-		mPointers[position].mSizeOfHorizontalGaps = mPointers[position - 1].mSizeOfHorizontalGaps + 1;
-	} else currentQueryGapScore = queryGapOpenScore;
-
-
-	// ====================================
-	// open a gap in the reference sequence
-	// ====================================
-
-	
-	float anchorGapExtendScore = mAnchorGapScores[column + 1] - mGapExtendPenalty;
-	float anchorGapOpenScore   = mBestScores[column + 1] - mGapOpenPenalty;
-
-	// compute the homo-polymer gap score if enabled	
-	if(mUseHomoPolymerGapOpenPenalty)
-		if((columnNum > 1) && (s1[columnNum] == s1[columnNum - 1]))
-			anchorGapOpenScore = mBestScores[column + 1] - mHomoPolymerGapOpenPenalty;
-
-	if(anchorGapExtendScore > anchorGapOpenScore) {
-		mAnchorGapScores[column] = anchorGapExtendScore;
-		mPointers[position].mSizeOfVerticalGaps = mPointers[position - mBandwidth - 1].mSizeOfVerticalGaps + 1;
-	} else mAnchorGapScores[column] = anchorGapOpenScore;
-
-	// ======================================
-	// calculate the best score and direction
-	// ======================================
-
-	//mBestScores[column] = MaxFloats(totalSimilarityScore, mAnchorGapScores[column], currentQueryGapScore);
-	mBestScores[column] = MaxFloats(totalSimilarityScore, currentQueryGapScore, mAnchorGapScores[column]);
-
-	// determine the traceback direction
-	// diagonal (445364713) > stop (238960195) > up (214378647) > left (166504495)
-	if(mBestScores[column] == 0)                            mPointers[position].Direction = Directions_STOP;
-	else if(mBestScores[column] == totalSimilarityScore)    mPointers[position].Direction = Directions_UP;
-	else if(mBestScores[column] == currentQueryGapScore)   mPointers[position].Direction = Directions_LEFT;
-	else                                                    mPointers[position].Direction = Directions_DIAGONAL;
-
-	return mBestScores[column];
-}
-
-// corrects the homopolymer gap order for forward alignments
-void CBandedSmithWaterman::CorrectHomopolymerGapOrder(const unsigned int numBases, const unsigned int numMismatches) {
-
-
-	// this is only required for alignments with mismatches
-	//if(al.NumMismatches == 0) return;
-	if ( numMismatches == 0 ) return;
-
-	// localize the alignment data
-	//char* pReference = al.Reference.Data();
-	//char* pQuery     = al.Query.Data();
-	//const unsigned int numBases = al.Reference.Length();
-	char* pReference = mReversedAnchor;
-	char* pQuery     = mReversedQuery;
-
-	// initialize
-	bool hasReferenceGap = false, hasQueryGap = false;
-	char* pNonGapSeq = NULL;
-	char* pGapSeq    = NULL;
-	char nonGapBase  = 'J';
-
-	// identify gapped regions
-	for(unsigned int i = 0; i < numBases; i++) {
-
-		// check if the current position is gapped
-		hasReferenceGap = false;
-		hasQueryGap     = false;
-
-		if(pReference[i] == GAP) {
-			hasReferenceGap = true;
-			pNonGapSeq      = pQuery;
-			pGapSeq         = pReference;
-			nonGapBase      = pQuery[i];
-		}
-
-		if(pQuery[i] == GAP) {
-			hasQueryGap = true;
-			pNonGapSeq  = pReference;
-			pGapSeq     = pQuery;
-			nonGapBase  = pReference[i];
-		}
-
-		// continue if we don't have any gaps
-		if(!hasReferenceGap && !hasQueryGap) continue;
-
-		// sanity check
-		if(hasReferenceGap && hasQueryGap) {
-			printf("ERROR: Found a gap in both the reference sequence and query sequence.\n");
-			exit(1);
-		}
-
-		// find the non-gapped length (forward)
-		unsigned short numGappedBases = 0;
-		unsigned short nonGapLength   = 0;
-		unsigned short testPos = i;
-		while(testPos < numBases) {
-
-			const char gs  = pGapSeq[testPos];
-			const char ngs = pNonGapSeq[testPos];
-
-			bool isPartofHomopolymer = false;
-			if(((gs == nonGapBase) || (gs == GAP)) && (ngs == nonGapBase)) isPartofHomopolymer = true;
-			if(!isPartofHomopolymer) break;
-
-			if(gs == GAP) numGappedBases++;
-			else nonGapLength++;
-			testPos++;
-		}
-
-		// fix the gap order
-		if(numGappedBases != 0) {
-			char* pCurrentSequence = pGapSeq + i;
-			memset(pCurrentSequence, nonGapBase, nonGapLength);
-			pCurrentSequence += nonGapLength;
-			memset(pCurrentSequence, GAP, numGappedBases);
-		}
-
-		// increment
-		i += numGappedBases + nonGapLength - 1;
-	}
-}
-
-// creates a simple scoring matrix to align the nucleotides and the ambiguity code N
-void CBandedSmithWaterman::CreateScoringMatrix(void) {
-
-	unsigned int nIndex = 13;
-	unsigned int xIndex = 23;
-
-	// define the N score to be 1/4 of the span between mismatch and match
-	//const short nScore = mMismatchScore + (short)(((mMatchScore - mMismatchScore) / 4.0) + 0.5);
-
-	// calculate the scoring matrix
-	for(unsigned char i = 0; i < MOSAIK_NUM_NUCLEOTIDES; i++) {
-		for(unsigned char j = 0; j < MOSAIK_NUM_NUCLEOTIDES; j++) {
-
-			// N.B. matching N to everything (while conceptually correct) leads to some
-			// bad alignments, lets make N be a mismatch instead.
-
-			// add the matches or mismatches to the hashtable (N is a mismatch)
-			if((i == nIndex) || (j == nIndex)) mScoringMatrix[i][j] = mMismatchScore;
-			else if((i == xIndex) || (j == xIndex)) mScoringMatrix[i][j] = mMismatchScore;
-			else if(i == j) mScoringMatrix[i][j] = mMatchScore;
-			else mScoringMatrix[i][j] = mMismatchScore;
-		}
-	}
-
-	// add ambiguity codes
-	mScoringMatrix['M' - 'A']['A' - 'A'] = mMatchScore;	// M - A
-	mScoringMatrix['A' - 'A']['M' - 'A'] = mMatchScore;
-	// add ambiguity codes
-	mScoringMatrix['M' - 'A']['A' - 'A'] = mMatchScore;	// M - A
-	mScoringMatrix['A' - 'A']['M' - 'A'] = mMatchScore;
-	mScoringMatrix['M' - 'A']['C' - 'A'] = mMatchScore; // M - C
-	mScoringMatrix['C' - 'A']['M' - 'A'] = mMatchScore;
-
-	mScoringMatrix['R' - 'A']['A' - 'A'] = mMatchScore;	// R - A
-	mScoringMatrix['A' - 'A']['R' - 'A'] = mMatchScore;
-	mScoringMatrix['R' - 'A']['G' - 'A'] = mMatchScore; // R - G
-	mScoringMatrix['G' - 'A']['R' - 'A'] = mMatchScore;
-
-	mScoringMatrix['W' - 'A']['A' - 'A'] = mMatchScore;	// W - A
-	mScoringMatrix['A' - 'A']['W' - 'A'] = mMatchScore;
-	mScoringMatrix['W' - 'A']['T' - 'A'] = mMatchScore; // W - T
-	mScoringMatrix['T' - 'A']['W' - 'A'] = mMatchScore;
-
-	mScoringMatrix['S' - 'A']['C' - 'A'] = mMatchScore;	// S - C
-	mScoringMatrix['C' - 'A']['S' - 'A'] = mMatchScore;
-	mScoringMatrix['S' - 'A']['G' - 'A'] = mMatchScore; // S - G
-	mScoringMatrix['G' - 'A']['S' - 'A'] = mMatchScore;
-
-	mScoringMatrix['Y' - 'A']['C' - 'A'] = mMatchScore;	// Y - C
-	mScoringMatrix['C' - 'A']['Y' - 'A'] = mMatchScore;
-	mScoringMatrix['Y' - 'A']['T' - 'A'] = mMatchScore; // Y - T
-	mScoringMatrix['T' - 'A']['Y' - 'A'] = mMatchScore;
-
-	mScoringMatrix['K' - 'A']['G' - 'A'] = mMatchScore;	// K - G
-	mScoringMatrix['G' - 'A']['K' - 'A'] = mMatchScore;
-	mScoringMatrix['K' - 'A']['T' - 'A'] = mMatchScore; // K - T
-	mScoringMatrix['T' - 'A']['K' - 'A'] = mMatchScore;
-
-	mScoringMatrix['V' - 'A']['A' - 'A'] = mMatchScore;	// V - A
-	mScoringMatrix['A' - 'A']['V' - 'A'] = mMatchScore;
-	mScoringMatrix['V' - 'A']['C' - 'A'] = mMatchScore; // V - C
-	mScoringMatrix['C' - 'A']['V' - 'A'] = mMatchScore;
-	mScoringMatrix['V' - 'A']['G' - 'A'] = mMatchScore; // V - G
-	mScoringMatrix['G' - 'A']['V' - 'A'] = mMatchScore;
-
-	mScoringMatrix['H' - 'A']['A' - 'A'] = mMatchScore;	// H - A
-	mScoringMatrix['A' - 'A']['H' - 'A'] = mMatchScore;
-	mScoringMatrix['H' - 'A']['C' - 'A'] = mMatchScore; // H - C
-	mScoringMatrix['C' - 'A']['H' - 'A'] = mMatchScore;
-	mScoringMatrix['H' - 'A']['T' - 'A'] = mMatchScore; // H - T
-	mScoringMatrix['T' - 'A']['H' - 'A'] = mMatchScore;
-
-	mScoringMatrix['D' - 'A']['A' - 'A'] = mMatchScore;	// D - A
-	mScoringMatrix['A' - 'A']['D' - 'A'] = mMatchScore;
-	mScoringMatrix['D' - 'A']['G' - 'A'] = mMatchScore; // D - G
-	mScoringMatrix['G' - 'A']['D' - 'A'] = mMatchScore;
-	mScoringMatrix['D' - 'A']['T' - 'A'] = mMatchScore; // D - T
-	mScoringMatrix['T' - 'A']['D' - 'A'] = mMatchScore;
-
-	mScoringMatrix['B' - 'A']['C' - 'A'] = mMatchScore;	// B - C
-	mScoringMatrix['C' - 'A']['B' - 'A'] = mMatchScore;
-	mScoringMatrix['B' - 'A']['G' - 'A'] = mMatchScore; // B - G
-	mScoringMatrix['G' - 'A']['B' - 'A'] = mMatchScore;
-	mScoringMatrix['B' - 'A']['T' - 'A'] = mMatchScore; // B - T
-	mScoringMatrix['T' - 'A']['B' - 'A'] = mMatchScore;
-}
-
-// enables homo-polymer scoring
-void CBandedSmithWaterman::EnableHomoPolymerGapPenalty(float hpGapOpenPenalty) {
-	mUseHomoPolymerGapOpenPenalty = true;
-	mHomoPolymerGapOpenPenalty    = hpGapOpenPenalty;
-}
-
-// reinitializes the matrices
-void CBandedSmithWaterman::ReinitializeMatrices(const PositionType& positionType, const unsigned int& s1Length, const unsigned int& s2Length, const pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> > hr) {
-
-/*
-	try {
-		mBestScores	 = new float[mBandwidth + 2];
-		mAnchorGapScores = new float[mBandwidth + 2];
-	} catch(bad_alloc) {
-		printf("ERROR: Unable to allocate enough memory for the banded Smith-Waterman algorithm.\n");
-		exit(1);
-	}
-*/
-
-	const unsigned int numColumns = mBandwidth + 2;
-	unsigned int numRows = 0;
-
-	switch(positionType) {
-		case Position_REF_AND_QUERY_ZERO:
-			numRows = s2Length + 1;
-			break;
-		case Position_REF_ZERO:
-			numRows = s2Length - hr.second.first + 2;
-			break;
-		case Position_QUERY_ZERO:
-			numRows = min(s2Length + 1, s1Length - hr.first.first + 2);
-			break;
-		case Position_REF_AND_QUERO_NONZERO:
-			numRows = min(s1Length - hr.first.first + 2, s2Length - hr.second.first + 2);
-			break;
-	}
-
-	// update the size of the backtrace matrix
-	if((numColumns * numRows) > mCurrentMatrixSize) {
-
-		mCurrentMatrixSize = numColumns * numRows;
-		if(mPointers) delete [] mPointers;
-
-		try {
-			mPointers = new ElementInfo[mCurrentMatrixSize];
-		} catch(bad_alloc) {
-			printf("ERROR: Unable to allocate enough memory for the banded Smith-Waterman algorithm.\n");
-			exit(1);
-		}
-	}
-
-	// initialize our backtrace matrix
-	ElementInfo defaultElement;
-	defaultElement.Direction = Directions_STOP;
-	defaultElement.mSizeOfHorizontalGaps = 1;
-	defaultElement.mSizeOfVerticalGaps   = 1;
-
-	uninitialized_fill(mPointers, mPointers + mCurrentMatrixSize, defaultElement);
-
-	// update the sequence character arrays
-	if((s1Length + s2Length) > mCurrentAQSumSize) {
-
-		mCurrentAQSumSize = s1Length + s2Length;
-		if(mReversedAnchor)	delete [] mReversedAnchor;
-		if(mReversedQuery)	delete [] mReversedQuery;
-
-		try {
-			mReversedAnchor	= new char[mCurrentAQSumSize + 1]; // reversed sequence #1
-			mReversedQuery	= new char[mCurrentAQSumSize + 1]; // reversed sequence #2
-		} catch(bad_alloc) {
-			printf("ERROR: Unable to allocate enough memory for the banded Smith-Waterman algorithm.\n");
-			exit(1);
-		}
-	}
-
-	// initialize the gap score and score vectors
-	uninitialized_fill(mAnchorGapScores, mAnchorGapScores + mBandwidth + 2, FLOAT_NEGATIVE_INFINITY);
-	memset((char*)mBestScores, 0, SIZEOF_FLOAT * (mBandwidth + 2));
-	mBestScores[0]              = FLOAT_NEGATIVE_INFINITY;
-	mBestScores[mBandwidth + 1] = FLOAT_NEGATIVE_INFINITY;
-}
-
-// performs the backtrace algorithm
-void CBandedSmithWaterman::Traceback(unsigned int& referenceAl, string& cigarAl, const string& s1, const string& s2, unsigned int bestRow, unsigned int bestColumn, const unsigned int rowOffset, const unsigned int columnOffset){
-
-
-	unsigned int currentRow		 = bestRow;
-	unsigned int currentColumn	 = bestColumn;
-	unsigned int currentPosition = ((currentRow + rowOffset) * (mBandwidth + 2)) + (columnOffset - currentRow + currentColumn);
-
-
-	// record the numbers of row and column before the current row and column
-	unsigned int previousRow	= bestRow;
-	unsigned int previousColumn	= bestColumn;
-
-	unsigned int gappedAnchorLen = 0;
-	unsigned int gappedQueryLen  = 0;
-	unsigned int numMismatches   = 0;
-
-	bool keepProcessing = true;
-	while(keepProcessing) {
-		unsigned int nVerticalGap = 0;
-		unsigned int nHorizontalGap = 0;
-		switch(mPointers[currentPosition].Direction){
-			case Directions_DIAGONAL:
-				nVerticalGap = mPointers[currentPosition].mSizeOfVerticalGaps;
-				for(unsigned int i = 0; i < nVerticalGap; i++){
-					mReversedAnchor[gappedAnchorLen++] = GAP;
-					mReversedQuery[gappedQueryLen++]   = s2[currentRow];
-
-					numMismatches++;
-
-					previousRow = currentRow;
-					previousColumn = currentColumn;
-
-					currentRow--;
-				}
-				break;
-
-			case Directions_STOP:
-				keepProcessing = false;
-				//mReversedAnchor[gappedAnchorLen+1]='\0';
-				//mReversedQuery [gappedQueryLen+1]='\0';
-				break;
-
-			case Directions_UP:
-
-				mReversedAnchor[gappedAnchorLen++] = s1[currentColumn];
-				mReversedQuery[gappedQueryLen++]   = s2[currentRow];
-
-				if(s1[currentColumn] != s2[currentRow]) numMismatches++;
-				previousRow = currentRow;
-				previousColumn = currentColumn;
-
-				currentRow--;
-				currentColumn--;
-				break;
-
-			case Directions_LEFT:
-				nHorizontalGap =  mPointers[currentPosition].mSizeOfHorizontalGaps;
-				for(unsigned int i = 0; i < nHorizontalGap; i++){
-
-					mReversedAnchor[gappedAnchorLen++] = s1[currentColumn];
-					mReversedQuery[gappedQueryLen++]   = GAP;
-
-					numMismatches++;
-
-					previousRow = currentRow;
-					previousColumn = currentColumn;
-
-
-					currentColumn--;
-				}
-				break;
-		}
-		currentPosition = ((currentRow + rowOffset) * (mBandwidth + 2)) + (columnOffset - currentRow + currentColumn);
-	}
-
-	// correct the reference and query sequence order
-	mReversedAnchor[gappedAnchorLen] = 0;
-	mReversedQuery [gappedQueryLen] = 0;
-	reverse(mReversedAnchor, mReversedAnchor + gappedAnchorLen);
-	reverse(mReversedQuery,  mReversedQuery  + gappedQueryLen);
-
-	//alignment.Reference = mReversedAnchor;
-	//alignment.Query     = mReversedQuery;
-
-	// assign the alignment endpoints
-	//alignment.ReferenceBegin = previousColumn;
-	//alignment.ReferenceEnd   = bestColumn;
-	referenceAl  = previousColumn;
-	/*  
-	if(alignment.IsReverseComplement){
-		alignment.QueryBegin = s2.length() - bestRow - 1; 
-		alignment.QueryEnd   = s2.length() - previousRow - 1;
-	} else {
-		alignment.QueryBegin = previousRow; 
-		alignment.QueryEnd   = bestRow;
-	}
-	*/
-	
-	//alignment.QueryLength	= alignment.QueryEnd - alignment.QueryBegin + 1;
-	//alignment.NumMismatches = numMismatches;
-
-	const unsigned int alLength = strlen(mReversedAnchor);
-	unsigned int m = 0, d = 0, i = 0;
-	bool dashRegion = false;
-	ostringstream oCigar;
-
-	if ( previousRow != 0 )
-		oCigar << previousRow << 'S';
-
-	for ( unsigned int j = 0; j < alLength; j++ ) {
-		// m
-		if ( ( mReversedAnchor[j] != GAP ) && ( mReversedQuery[j] != GAP ) ) {
-			if ( dashRegion ) {
-				if ( d != 0 ) oCigar << d << 'D';
-				else          oCigar << i << 'I';
-			}
-			dashRegion = false;
-			m++;
-			d = 0;
-			i = 0;
-		}
-		// I or D
-		else {
-			if ( !dashRegion )
-				oCigar << m << 'M';
-			dashRegion = true;
-			m = 0;
-			if ( mReversedAnchor[j] == GAP ) {
-				if ( d != 0 ) oCigar << d << 'D';
-				i++;
-				d = 0;
-			}
-			else {
-				if ( i != 0 ) oCigar << i << 'I';
-				d++;
-				i = 0;
-			}
-		}
-	}
-	
-	if      ( m != 0 ) oCigar << m << 'M';
-	else if ( d != 0 ) oCigar << d << 'D';
-	else if ( i != 0 ) oCigar << i << 'I';
-
-	if ( ( bestRow + 1 ) != s2.length() )
-		oCigar << s2.length() - bestRow - 1 << 'S';
-
-	cigarAl = oCigar.str();
-	
-
-	// correct the homopolymer gap order
-	CorrectHomopolymerGapOrder(alLength, numMismatches);
-
-}


=====================================
external/vcflib/smithwaterman/BandedSmithWaterman.h deleted
=====================================
@@ -1,111 +0,0 @@
-#pragma once
-
-#include <iostream>
-#include <algorithm>
-#include <memory>
-//#include "Alignment.h"
-#include "Mosaik.h"
-//#include "HashRegion.h"
-#include <string.h>
-#include <stdio.h>
-#include <sstream>
-#include <string>
-
-using namespace std;
-
-#define MOSAIK_NUM_NUCLEOTIDES 26
-#define GAP '-'
-
-typedef unsigned char DirectionType;
-typedef unsigned char PositionType;
-
-struct ElementInfo {
-	unsigned int Direction             : 2;
-	unsigned int mSizeOfVerticalGaps   : 15;
-	unsigned int mSizeOfHorizontalGaps : 15;
-};
-
-class CBandedSmithWaterman {
-public:
-	// constructor
-	CBandedSmithWaterman(float matchScore, float mismatchScore, float gapOpenPenalty, float gapExtendPenalty, unsigned int bandWidth);
-	// destructor
-	~CBandedSmithWaterman(void);
-	// aligns the query sequence to the anchor using the Smith Waterman Gotoh algorithm
-	void Align(unsigned int& referenceAl, string& stringAl, const string& s1, const string& s2, pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> >& hr);
-	// enables homo-polymer scoring
-	void EnableHomoPolymerGapPenalty(float hpGapOpenPenalty);
-private:
-	// calculates the score during the forward algorithm
-	float CalculateScore(const string& s1, const string& s2, const unsigned int rowNum, const unsigned int columnNum, float& currentQueryGapScore, const unsigned int rowOffset, const unsigned int columnOffset);
-	// creates a simple scoring matrix to align the nucleotides and the ambiguity code N
-	void CreateScoringMatrix(void);
-	// corrects the homopolymer gap order for forward alignments
-	void CorrectHomopolymerGapOrder(const unsigned int numBases, const unsigned int numMismatches);
-	// returns the maximum floating point number
-	static inline float MaxFloats(const float& a, const float& b, const float& c);
-	// reinitializes the matrices
-	void ReinitializeMatrices(const PositionType& positionType, const unsigned int& s1Length, const unsigned int& s2Length, const pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> > hr);
-	// performs the backtrace algorithm
-	void Traceback(unsigned int& referenceAl, string& stringAl, const string& s1, const string& s2, unsigned int bestRow, unsigned int bestColumn, const unsigned int rowOffset, const unsigned int columnOffset);
-	// updates the best score during the forward algorithm
-	inline void UpdateBestScore(unsigned int& bestRow, unsigned int& bestColumn, float& bestScore, const unsigned int rowNum, const unsigned int columnNum, const float score);
-	// our simple scoring matrix
-	float mScoringMatrix[MOSAIK_NUM_NUCLEOTIDES][MOSAIK_NUM_NUCLEOTIDES];
-	// keep track of maximum initialized sizes
-	unsigned int mCurrentMatrixSize;
-	unsigned int mCurrentAnchorSize;
-	unsigned int mCurrentAQSumSize;
-	unsigned int mBandwidth;
-	// define our backtrace directions
-	const static DirectionType Directions_STOP;
-	const static DirectionType Directions_LEFT;
-	const static DirectionType Directions_DIAGONAL;
-	const static DirectionType Directions_UP;
-	// store the backtrace pointers
-	ElementInfo* mPointers;
-	// define our position types
-	const static PositionType Position_REF_AND_QUERY_ZERO;
-	const static PositionType Position_REF_ZERO;
-	const static PositionType Position_QUERY_ZERO;
-	const static PositionType Position_REF_AND_QUERO_NONZERO;
-	// define scoring constants
-	const float mMatchScore;
-	const float mMismatchScore;
-	const float mGapOpenPenalty;
-	const float mGapExtendPenalty;
-	// score if xi aligns to a gap after yi
-	float* mAnchorGapScores;
-	// best score of alignment x1...xi to y1...yi
-	float* mBestScores;
-	// our reversed alignment
-	char* mReversedAnchor;
-	char* mReversedQuery;
-	// define static constants
-	static const float FLOAT_NEGATIVE_INFINITY;
-	// toggles the use of the homo-polymer gap open penalty
-	bool mUseHomoPolymerGapOpenPenalty;
-	float mHomoPolymerGapOpenPenalty;
-};
-
-// returns the maximum floating point number
-inline float CBandedSmithWaterman::MaxFloats(const float& a, const float& b, const float& c) {
-	float max = 0.0f;
-	if(a > max) max = a;
-	if(b > max) max = b;
-	if(c > max) max = c;
-	return max;
-}
-
-// updates the best score during the forward algorithm
-inline void CBandedSmithWaterman::UpdateBestScore(unsigned int& bestRow, unsigned int& bestColumn, float& bestScore, const unsigned int rowNum, const unsigned int columnNum, const float score) {
-
-	//const unsigned int row    = rowNum + rowOffset;
-	//const unsigned int column = columnOffset - rowNum + columnNum;
-
-	if(score > bestScore) {
-		bestRow    = rowNum;
-		bestColumn = columnNum;
-		bestScore  = score;
-	}
-}


=====================================
external/vcflib/smithwaterman/IndelAllele.cpp deleted
=====================================
@@ -1,62 +0,0 @@
-#include "IndelAllele.h"
-
-using namespace std;
-
-
-bool IndelAllele::homopolymer(void) {
-    string::iterator s = sequence.begin();
-    char c = *s++;
-    while (s != sequence.end()) {
-        if (c != *s++) return false;
-    }
-    return true;
-}
-
-int IndelAllele::readLength(void) {
-    if (insertion) {
-	return length;
-    } else {
-	return 0;
-    }
-}
-
-int IndelAllele::referenceLength(void) {
-    if (insertion) {
-	return 0;
-    } else {
-	return length;
-    }
-}
-
-bool homopolymer(string sequence) {
-    string::iterator s = sequence.begin();
-    char c = *s++;
-    while (s != sequence.end()) {
-        if (c != *s++) return false;
-    }
-    return true;
-}
-
-ostream& operator<<(ostream& out, const IndelAllele& indel) {
-    string t = indel.insertion ? "i" : "d";
-    out << t <<  ":" << indel.position << ":" << indel.readPosition << ":" << indel.length << ":" << indel.sequence;
-    return out;
-}
-
-bool operator==(const IndelAllele& a, const IndelAllele& b) {
-    return (a.insertion == b.insertion
-            && a.length == b.length
-            && a.position == b.position
-            && a.sequence == b.sequence);
-}
-
-bool operator!=(const IndelAllele& a, const IndelAllele& b) {
-    return !(a==b);
-}
-
-bool operator<(const IndelAllele& a, const IndelAllele& b) {
-    ostringstream as, bs;
-    as << a;
-    bs << b;
-    return as.str() < bs.str();
-}


=====================================
external/vcflib/smithwaterman/IndelAllele.h deleted
=====================================
@@ -1,37 +0,0 @@
-#ifndef __INDEL_ALLELE_H
-#define __INDEL_ALLELE_H
-
-#include <string>
-#include <iostream>
-#include <sstream>
-
-using namespace std;
-
-class IndelAllele {
-    friend ostream& operator<<(ostream&, const IndelAllele&);
-    friend bool operator==(const IndelAllele&, const IndelAllele&);
-    friend bool operator!=(const IndelAllele&, const IndelAllele&);
-    friend bool operator<(const IndelAllele&, const IndelAllele&);
-public:
-    bool insertion;
-    int length;
-    int referenceLength(void);
-    int readLength(void);
-    int position;
-    int readPosition;
-    string sequence;
-
-    bool homopolymer(void);
-
-    IndelAllele(bool i, int l, int p, int rp, string s)
-        : insertion(i), length(l), position(p), readPosition(rp), sequence(s)
-    { }
-};
-
-bool homopolymer(string sequence);
-ostream& operator<<(ostream& out, const IndelAllele& indel);
-bool operator==(const IndelAllele& a, const IndelAllele& b);
-bool operator!=(const IndelAllele& a, const IndelAllele& b);
-bool operator<(const IndelAllele& a, const IndelAllele& b);
-
-#endif


=====================================
external/vcflib/smithwaterman/LeftAlign.cpp deleted
=====================================
@@ -1,853 +0,0 @@
-#include "LeftAlign.h"
-
-//bool debug;
-#define VERBOSE_DEBUG
-
-// Attempts to left-realign all the indels represented by the alignment cigar.
-//
-// This is done by shifting all indels as far left as they can go without
-// mismatch, then merging neighboring indels of the same class.  leftAlign
-// updates the alignment cigar with changes, and returns true if realignment
-// changed the alignment cigar.
-//
-// To left-align, we move multi-base indels left by their own length as long as
-// the preceding bases match the inserted or deleted sequence.  After this
-// step, we handle multi-base homopolymer indels by shifting them one base to
-// the left until they mismatch the reference.
-//
-// To merge neighboring indels, we iterate through the set of left-stabilized
-// indels.  For each indel we add a new cigar element to the new cigar.  If a
-// deletion follows a deletion, or an insertion occurs at the same place as
-// another insertion, we merge the events by extending the previous cigar
-// element.
-//
-// In practice, we must call this function until the alignment is stabilized.
-//
-bool leftAlign(string& querySequence, string& cigar, string& baseReferenceSequence, int& offset, bool debug) {
-
-    debug = false;
-
-    string referenceSequence = baseReferenceSequence.substr(offset);
-
-    int arsOffset = 0; // pointer to insertion point in aligned reference sequence
-    string alignedReferenceSequence, alignedQuerySequence;
-    if (debug) alignedReferenceSequence = referenceSequence;
-    if (debug) alignedQuerySequence = querySequence;
-    int aabOffset = 0;
-
-    // store information about the indels
-    vector<IndelAllele> indels;
-
-    int rp = 0;  // read position, 0-based relative to read
-    int sp = 0;  // sequence position
-
-    string softBegin;
-    string softEnd;
-
-    string cigarbefore = cigar;
-
-    vector<pair<int, string> > cigarData = splitCIGAR(cigar);
-    for (vector<pair<int, string> >::const_iterator c = cigarData.begin();
-        c != cigarData.end(); ++c) {
-        unsigned int l = c->first;
-        string t = c->second;
-	if (debug) cerr << l << t << " " << sp << " " << rp << endl;
-        if (t == "M") { // match or mismatch
-            sp += l;
-            rp += l;
-        } else if (t == "D") { // deletion
-            indels.push_back(IndelAllele(false, l, sp, rp, referenceSequence.substr(sp, l)));
-            if (debug) { cerr << indels.back() << endl;  alignedQuerySequence.insert(rp + aabOffset, string(l, '-')); }
-            aabOffset += l;
-            sp += l;  // update reference sequence position
-        } else if (t == "I") { // insertion
-            indels.push_back(IndelAllele(true, l, sp, rp, querySequence.substr(rp, l)));
-            if (debug) { cerr << indels.back() << endl; alignedReferenceSequence.insert(sp + softBegin.size() + arsOffset, string(l, '-')); }
-            arsOffset += l;
-            rp += l;
-        } else if (t == "S") { // soft clip, clipped sequence present in the read not matching the reference
-            // remove these bases from the refseq and read seq, but don't modify the alignment sequence
-            if (rp == 0) {
-                alignedReferenceSequence = string(l, '*') + alignedReferenceSequence;
-		//indels.push_back(IndelAllele(true, l, sp, rp, querySequence.substr(rp, l)));
-                softBegin = querySequence.substr(0, l);
-            } else {
-                alignedReferenceSequence = alignedReferenceSequence + string(l, '*');
-		//indels.push_back(IndelAllele(true, l, sp, rp, querySequence.substr(rp, l)));
-                softEnd = querySequence.substr(querySequence.size() - l, l);
-            }
-            rp += l;
-        } else if (t == "H") { // hard clip on the read, clipped sequence is not present in the read
-        } else if (t == "N") { // skipped region in the reference not present in read, aka splice
-            sp += l;
-        }
-    }
-
-
-    if (debug) cerr << "| " << cigarbefore << endl
-		    << "| " << alignedReferenceSequence << endl
-		    << "| " << alignedQuerySequence << endl;
-
-    // if no indels, return the alignment
-    if (indels.empty()) { return false; }
-
-    if (debug) {
-	for (vector<IndelAllele>::iterator a = indels.begin(); a != indels.end(); ++a) cerr << *a << " ";
-	cerr << endl;
-    }
-
-    // for each indel, from left to right
-    //     while the indel sequence repeated to the left and we're not matched up with the left-previous indel
-    //         move the indel left
-
-    vector<IndelAllele>::iterator previous = indels.begin();
-    for (vector<IndelAllele>::iterator id = indels.begin(); id != indels.end(); ++id) {
-
-        // left shift by repeats
-        //
-        // from 1 base to the length of the indel, attempt to shift left
-        // if the move would cause no change in alignment optimality (no
-        // introduction of mismatches, and by definition no change in gap
-        // length), move to the new position.
-        // in practice this moves the indel left when we reach the size of
-        // the repeat unit.
-        //
-        int steppos, readsteppos;
-        IndelAllele& indel = *id;
-        int i = 1;
-
-        while (i <= indel.length) {
-
-            int steppos = indel.position - i;
-            int readsteppos = indel.readPosition - i;
-
-            if (debug) {
-                if (steppos >= 0 && readsteppos >= 0) {
-                    cerr << "refseq flank " << referenceSequence.substr(steppos, indel.length) << endl;
-                    cerr << "qryseq flank " << querySequence.substr(readsteppos, indel.length) << endl;
-                    cerr << "indelseq     " << indel.sequence << endl;
-                }
-            }
-
-            while (steppos >= 0 && readsteppos >= 0
-                   && indel.sequence == referenceSequence.substr(steppos, indel.length)
-                   && indel.sequence == querySequence.substr(readsteppos, indel.length)
-                   && (id == indels.begin()
-                       || (previous->insertion && steppos >= previous->position)
-                       || (!previous->insertion && steppos >= previous->position + previous->length))) {
-                LEFTALIGN_DEBUG((indel.insertion ? "insertion " : "deletion ") << indel << " shifting " << i << "bp left" << endl);
-                indel.position -= i;
-                indel.readPosition -= i;
-                steppos = indel.position - i;
-                readsteppos = indel.readPosition - i;
-            }
-            do {
-                ++i;
-            } while (i <= indel.length && indel.length % i != 0);
-        }
-
-
-
-        // left shift indels with exchangeable flanking sequence
-        //
-        // for example:
-        //
-        //    GTTACGTT           GTTACGTT
-        //    GT-----T   ---->   G-----TT
-        //
-        // GTGTGACGTGT           GTGTGACGTGT
-        // GTGTG-----T   ---->   GTG-----TGT
-        //
-        // GTGTG-----T           GTG-----TGT
-        // GTGTGACGTGT   ---->   GTGTGACGTGT
-        //
-        //
-
-        steppos = indel.position - 1;
-        readsteppos = indel.readPosition - 1;
-        while (steppos >= 0 && readsteppos >= 0
-               && querySequence.at(readsteppos) == referenceSequence.at(steppos)
-	       && (int)referenceSequence.size() > steppos + indel.length
-	       && indel.sequence.at((int) indel.sequence.size() - 1) == referenceSequence.at(steppos + indel.length) // are the exchanged bases going to match wrt. the reference?
-               && querySequence.at(readsteppos) == indel.sequence.at((int) indel.sequence.size() - 1)
-               && (id == indels.begin()
-                   || (previous->insertion && indel.position - 1 >= previous->position)
-                   || (!previous->insertion && indel.position - 1 >= previous->position + previous->length))) {
-            if (debug) cerr << (indel.insertion ? "insertion " : "deletion ") << indel << " exchanging bases " << 1 << "bp left" << endl;
-            indel.sequence = indel.sequence.at(indel.sequence.size() - 1) + indel.sequence.substr(0, indel.sequence.size() - 1);
-            indel.position -= 1;
-            indel.readPosition -= 1;
-	    if (debug) cerr << indel << endl;
-            steppos = indel.position - 1;
-            readsteppos = indel.readPosition - 1;
-	    //if (debug && steppos && readsteppos) cerr << querySequence.at(readsteppos) << " ==? " << referenceSequence.at(steppos) << endl;
-	    //if (debug && steppos && readsteppos) cerr << indel.sequence.at((int) indel.sequence.size() - 1) << " ==? " << referenceSequence.at(steppos + indel.length) << endl;
-        }
-        // tracks previous indel, so we don't run into it with the next shift
-        previous = id;
-    }
-
-    if (debug) {
-	for (vector<IndelAllele>::iterator a = indels.begin(); a != indels.end(); ++a) cerr << *a << " ";
-	cerr << endl;
-    }
-
-    if (debug) cerr << "bring together floating indels" << endl;
-
-    // bring together floating indels
-    // from left to right
-    // check if we could merge with the next indel
-    // if so, adjust so that we will merge in the next step
-    if (indels.size() > 1) {
-        previous = indels.begin();
-        for (vector<IndelAllele>::iterator id = (indels.begin() + 1); id != indels.end(); ++id) {
-            IndelAllele& indel = *id;
-            // parsimony: could we shift right and merge with the previous indel?
-            // if so, do it
-            int prev_end_ref = previous->insertion ? previous->position : previous->position + previous->length;
-            int prev_end_read = !previous->insertion ? previous->readPosition : previous->readPosition + previous->length;
-            if (previous->insertion == indel.insertion
-                    && ((previous->insertion
-                        && (previous->position < indel.position
-                        && previous->readPosition < indel.readPosition))
-                        ||
-                        (!previous->insertion
-                        && (previous->position + previous->length < indel.position)
-                        && (previous->readPosition < indel.readPosition)
-                        ))) {
-                if (previous->homopolymer()) {
-                    string seq = referenceSequence.substr(prev_end_ref, indel.position - prev_end_ref);
-                    string readseq = querySequence.substr(prev_end_read, indel.position - prev_end_ref);
-                    if (debug) cerr << "seq: " << seq << endl << "readseq: " << readseq << endl;
-                    if (previous->sequence.at(0) == seq.at(0)
-                            && homopolymer(seq)
-                            && homopolymer(readseq)) {
-                        if (debug) cerr << "moving " << *previous << " right to " 
-					<< (indel.insertion ? indel.position : indel.position - previous->length) << endl;
-                        previous->position = indel.insertion ? indel.position : indel.position - previous->length;
-			previous->readPosition = !indel.insertion ? indel.readPosition : indel.readPosition - previous->readLength(); // should this be readLength?
-                    }
-                }
-		/*
-                else {
-                    int pos = previous->position;
-		    int readpos = previous->readPosition;
-                    while (pos < (int) referenceSequence.length() &&
-                            ((previous->insertion && pos + previous->length <= indel.position)
-                            ||
-                            (!previous->insertion && pos + previous->length < indel.position))
-			   && previous->sequence == referenceSequence.substr(pos + previous->length, previous->length)
-			   && previous->sequence == querySequence.substr(readpos + previous->length, previous->length)
-			) {
-                        pos += previous->length;
-			readpos += previous->length;
-                    }
-		    string seq = previous->sequence;
-                    if (pos > previous->position) {
-			// wobble bases right to left as far as we can go
-			int steppos = previous->position + seq.size();
-			int readsteppos = previous->readPosition + seq.size();
-
-			while (querySequence.at(readsteppos) == referenceSequence.at(steppos)
-			       && querySequence.at(readsteppos) == seq.at(0)
-			       && (id == indels.begin()
-				   || (indel.insertion && pos + seq.size() - 1 <= indel.position)
-				   || (!previous->insertion && indel.position - 1 >= pos + previous->length))) {
-			    seq = seq.substr(1) + seq.at(0);
-			    ++pos;
-			    ++readpos;
-			    steppos = pos + 1;
-			    readsteppos = readpos + 1;
-			}
-
-			if (((previous->insertion && pos + previous->length == indel.position)
-			     ||
-			     (!previous->insertion && pos == indel.position - previous->length))
-			    ) {
-			    if (debug) cerr << "right-merging tandem repeat: moving " << *previous << " right to " << pos << endl;
-			    previous->position = pos;
-			    previous->readPosition = readpos;
-			    previous->sequence = seq;
-			}
-                    }
-                }
-		*/
-            }
-            previous = id;
-        }
-    }
-
-    if (debug) {
-	for (vector<IndelAllele>::iterator a = indels.begin(); a != indels.end(); ++a) cerr << *a << " ";
-	cerr << endl;
-    }
-
-
-    if (debug) cerr << "bring in indels at ends of read" << endl;
-
-    // try to "bring in" repeat indels at the end, for maximum parsimony
-    //
-    // e.g.
-    //
-    // AGAAAGAAAGAAAAAGAAAAAGAACCAAGAAGAAAA
-    // AGAAAG------AAAGAAAAAGAACCAAGAAGAAAA
-    //
-    //     has no information which distinguishes it from:
-    //
-    // AGAAAGAAAAAGAAAAAGAACCAAGAAGAAAA
-    // AGAAAG--AAAGAAAAAGAACCAAGAAGAAAA
-    //
-    // here we take the parsimonious explanation
-
-    if (!indels.empty()) {
-	// deal with the first indel
-	// the first deletion ... or the biggest deletion
-	vector<IndelAllele>::iterator a = indels.begin();
-	vector<IndelAllele>::iterator del = indels.begin();
-	for (; a != indels.end(); ++a) {
-	    //if (!a->insertion && a->length > biggestDel->length) biggestDel = a;
-	    if (!a->insertion && a->length) del = a;
-	if (!del->insertion) {
-	    //if (!indel.insertion) { // only handle deletions like this for now
-	    //if (!indel.insertion && !(indels.size() > 1 && indel.readPosition == indels.at(1).readPosition)) { // only handle deletions like this for now
-	    int insertedBpBefore = 0;
-	    int deletedBpBefore = 0;
-	    for (vector<IndelAllele>::iterator i = indels.begin(); i != del; ++i) {
-		if (i->insertion) insertedBpBefore += i->length;
-		else deletedBpBefore += i->length;
-	    }
-	    IndelAllele& indel = *del;
-	    int minsize = indel.length;
-	    int flankingLength = indel.readPosition;
-	    if (debug) cerr << indel << endl;
-	    string flanking = querySequence.substr(0, flankingLength);
-	    if (debug) cerr << flanking << endl;
-
-	    size_t p = referenceSequence.substr(0, indel.position + indel.length).rfind(flanking);
-	    if (p == string::npos) {
-		if (debug) cerr << "flanking not found" << endl;
-	    } else {
-		if (debug) {
-		    cerr << "flanking is at " << p << endl;
-		    cerr << "minsize would be " << (indel.position + indel.length) - ((int) p + flankingLength) << endl;
-		}
-		minsize = (indel.position + indel.length) - ((int) p + flankingLength);
-	    }
-
-	    if (debug) cerr << minsize << endl;
-
-	    if (minsize >= 0 && minsize < indel.length) {
-
-		int softdiff = softBegin.size();
-		if (!softBegin.empty()) { // remove soft clips if we can
-		    if (flankingLength < (int)softBegin.size()) {
-			softBegin = softBegin.substr(0, flankingLength - softBegin.size());
-			softdiff -= softBegin.size();
-		    } else {
-			softBegin.clear();
-		    }
-		}
-
-		// the new read position == the current read position
-		// the new reference position == the flanking length size
-		// the positional offset of the reference sequence == the new position of the deletion - the flanking length
-
-		int diff = indel.length - minsize - softdiff  + deletedBpBefore - insertedBpBefore;
-		//int querydiff = indel.length - minsize - softBegin.size() - insertedBpBefore + deletedBpBefore;
-		if (debug) cerr << "adjusting " << indel.length <<" " << minsize << "  " << softdiff << " " << diff << endl;
-		offset += diff;
-		///
-		indel.length = minsize;
-		indel.sequence = indel.sequence.substr(indel.sequence.size() - minsize);
-		indel.position = flankingLength;
-		indel.readPosition = indel.position; // if we have removed all the sequence before, this should be ==
-		referenceSequence = referenceSequence.substr(diff);
-
-		for (vector<IndelAllele>::iterator i = indels.begin(); i != indels.end(); ++i) {
-		    if (i < del) {
-			i->length = 0; // remove
-		    } else if (i > del) {
-			i->position -= diff;
-		    }
-		}
-	    }
-	    if (debug) cerr << indel << endl;
-
-	    // now, do the same for the reverse
-	    if (indel.length > 0) {
-		int minsize = indel.length + 1;
-		int flankingLength = querySequence.size() - indel.readPosition + indel.readLength();
-		string flanking = querySequence.substr(indel.readPosition + indel.readLength(), flankingLength);
-		//int indelRefEnd = indel.position + indel.referenceLength();
-
-		size_t p = referenceSequence.find(flanking, indel.position);
-		if (p == string::npos) {
-		    if (debug)
-			cerr << "flanking not found" << endl;
-		} else {
-		    if (debug) {
-			cerr << "flanking is at " << p << endl;
-			cerr << "minsize would be " << (int) p - indel.position << endl;
-		    }
-		    minsize = (int) p - indel.position;
-		}
-
-		if (debug) cerr << "minsize " << minsize << endl;
-		if (minsize >= 0 && minsize <= indel.length) {
-		    //referenceSequence = referenceSequence.substr(0, referenceSequence.size() - (indel.length - minsize));
-		    if (debug) cerr << "adjusting " << indel << endl;
-		    indel.length = minsize;
-		    indel.sequence = indel.sequence.substr(0, minsize);
-		    //cerr << indel << endl;
-		    if (!softEnd.empty()) { // remove soft clips if we can
-			if (flankingLength < (int)softEnd.size()) {
-			    softEnd = softEnd.substr(flankingLength - softEnd.size());
-			} else {
-			    softEnd.clear();
-			}
-		    }
-		    for (vector<IndelAllele>::iterator i = indels.begin(); i != indels.end(); ++i) {
-			if (i > del) {
-			    i->length = 0; // remove
-			}
-		    }
-		}
-	    }
-	}
-	}
-    }
-
-    if (debug) {
-	for (vector<IndelAllele>::iterator a = indels.begin(); a != indels.end(); ++a) cerr << *a << " ";
-	cerr << endl;
-    }
-
-    if (debug) cerr << "parsing indels" << endl;
-
-    // if soft clipping can be reduced by adjusting the tailing indels in the read, do it
-    // TODO
-
-    /*
-    int numEmptyIndels = 0;
-
-    if (!indels.empty()) {
-	vector<IndelAllele>::iterator a = indels.begin();
-	while (a != indels.end()) {
-	    if (debug) cerr << "parsing " << *a << endl;
-	    if (!(a->length > 0 && a->position >= 0)) {
-		++numEmptyIndels;
-	    }
-	    ++a;
-	}
-    }
-    */
-
-    for (vector<IndelAllele>::iterator i = indels.begin(); i != indels.end(); ++i) {
-	if (i->length == 0) continue;
-	if (i->insertion) {
-	    if (querySequence.substr(i->readPosition, i->readLength()) != i->sequence) {
-		cerr << "failure: " << *i << " should be " << querySequence.substr(i->readPosition, i->readLength()) << endl;
-		cerr << baseReferenceSequence << endl;
-		cerr << querySequence << endl;
-		throw 1;
-	    }
-	} else {
-	    if (referenceSequence.substr(i->position, i->length) != i->sequence) {
-		cerr << "failure: " << *i << " should be " << referenceSequence.substr(i->position, i->length) << endl;
-		cerr << baseReferenceSequence << endl;
-		cerr << querySequence << endl;
-		throw 1;
-	    }
-	}
-    }
-
-    if (indels.size() > 1) {
-        vector<IndelAllele>::iterator id = indels.begin();
-	while ((id + 1) != indels.end()) {
-	    if (debug) {
-		cerr << "indels: ";
-		for (vector<IndelAllele>::iterator a = indels.begin(); a != indels.end(); ++a) cerr << *a << " ";
-		cerr << endl;
-		//for (vector<IndelAllele>::iterator a = newIndels.begin(); a != newIndels.end(); ++a) cerr << *a << " ";
-		//cerr << endl;
-	    }
-
-	    // get the indels to try to merge
-	    while (id->length == 0 && (id + 1) != indels.end()) ++id;
-	    vector<IndelAllele>::iterator idn = (id + 1);
-	    while (idn != indels.end() && idn->length == 0) ++idn;
-	    if (idn == indels.end()) break;
-
-            IndelAllele& indel = *idn;
-	    IndelAllele& last = *id;
-	    if (debug) cerr << "trying " << last << " against " << indel << endl;
-
-	    int lastend = last.insertion ? last.position : (last.position + last.length);
-	    if (indel.position == lastend) {
-		if (debug) cerr << "indel.position " << indel.position << " lastend " << lastend << endl;
-		if (indel.insertion == last.insertion) {
-		    last.length += indel.length;
-		    last.sequence += indel.sequence;
-		    indel.length = 0;
-		    indel.sequence.clear();
-		    id = idn;
-		} else if (last.length && indel.length) { // if the end of the previous == the start of the current, cut it off of both the ins and the del
-		    if (debug) cerr << "Merging " << last << " " << indel << endl;
-		    int matchsize = 1;
-		    int biggestmatchsize = 0;
-
-		    while (matchsize <= (int)last.sequence.size() && matchsize <= (int)indel.sequence.size()) {
-			if (last.sequence.substr(last.sequence.size() - matchsize) == indel.sequence.substr(0, matchsize)) {
-			    biggestmatchsize = matchsize;
-			}
-			++matchsize;
-		    }
-		    if (debug) cerr << "biggestmatchsize " << biggestmatchsize << endl;
-
-		    last.sequence = last.sequence.substr(0, last.sequence.size() - biggestmatchsize);
-		    last.length -= biggestmatchsize;
-		    indel.sequence = indel.sequence.substr(biggestmatchsize);
-		    indel.length -= biggestmatchsize;
-		    if (indel.insertion) indel.readPosition += biggestmatchsize;
-		    else indel.position += biggestmatchsize;
-
-		    if (indel.length > 0) {
-			id = idn;
-		    }
-		}
-	    } else {
-		if (last.insertion != indel.insertion) {
-		    if (debug) cerr << "merging by overlap " << last << " " << indel << endl;
-		    // see if we can slide the sequence in between these two indels together
-		    string lastOverlapSeq;
-		    string indelOverlapSeq;
-
-		    if (last.insertion) {
-			lastOverlapSeq =
-			    last.sequence
-			    + querySequence.substr(last.readPosition + last.readLength(),
-						   indel.readPosition - (last.readPosition + last.readLength()));
-			indelOverlapSeq =
-			    referenceSequence.substr(last.position + last.referenceLength(),
-						     indel.position - (last.position + last.referenceLength()))
-			    + indel.sequence;
-		    } else {
-			lastOverlapSeq =
-			    last.sequence
-			    + referenceSequence.substr(last.position + last.referenceLength(),
-						       indel.position - (last.position + last.referenceLength()));
-			indelOverlapSeq =
-			    querySequence.substr(last.readPosition + last.readLength(),
-						 indel.readPosition - (last.readPosition + last.readLength()))
-			    + indel.sequence;
-		    }
-
-		    if (debug) {
-			if (!last.insertion) {
-			    if (last.insertion) cerr << string(last.length, '-');
-			    cerr << lastOverlapSeq;
-			    if (indel.insertion) cerr << string(indel.length, '-');
-			    cerr << endl;
-			    if (!last.insertion) cerr << string(last.length, '-');
-			    cerr << indelOverlapSeq;
-			    if (!indel.insertion) cerr << string(indel.length, '-');
-			    cerr << endl;
-			} else {
-			    if (last.insertion) cerr << string(last.length, '-');
-			    cerr << indelOverlapSeq;
-			    if (indel.insertion) cerr << string(indel.length, '-');
-			    cerr << endl;
-			    if (!last.insertion) cerr << string(last.length, '-');
-			    cerr << lastOverlapSeq;
-			    if (!indel.insertion) cerr << string(indel.length, '-');
-			    cerr << endl;
-			}
-		    }
-
-
-		    int dist = min(last.length, indel.length);
-		    int matchingInBetween = indel.position - (last.position + last.referenceLength());
-		    int previousMatchingInBetween = matchingInBetween;
-		    //int matchingInBetween = indel.position - last.position;
-		    if (debug) cerr << "matchingInBetween " << matchingInBetween << endl;
-		    if (debug) cerr << "dist " << dist << endl;
-		    //int mindist = matchingInBetween - dist;
-		    if (lastOverlapSeq == indelOverlapSeq) {
-			matchingInBetween = lastOverlapSeq.size();
-		    } else {
-			// TODO change to use string::find()
-			for (int i = dist; i > 0; --i) {
-			    if (debug) cerr << "lastoverlap: "
-					    << lastOverlapSeq.substr(lastOverlapSeq.size() - previousMatchingInBetween - i)
-					    << " thisoverlap: "
-					    << indelOverlapSeq.substr(0, i + previousMatchingInBetween) << endl;
-			    if (lastOverlapSeq.substr(lastOverlapSeq.size() - previousMatchingInBetween - i)
-				== indelOverlapSeq.substr(0, i + previousMatchingInBetween)) {
-				matchingInBetween = previousMatchingInBetween + i;
-				break;
-			    }
-			}
-		    }
-		    //cerr << last << " " << indel << endl;
-		    if (matchingInBetween > 0 && matchingInBetween > previousMatchingInBetween) {
-			if (debug) cerr << "matching " << matchingInBetween  << "bp between " << last << " " << indel
-				        << " was matching " << previousMatchingInBetween << endl;
-			int diff = matchingInBetween - previousMatchingInBetween;
-			last.length -= diff;
-			last.sequence = last.sequence.substr(0, last.length);
-			indel.length -= diff;
-			indel.sequence = indel.sequence.substr(diff);
-			if (!indel.insertion) indel.position += diff;
-			else indel.readPosition += diff;
-			if (debug) cerr << last << " " << indel << endl;
-		    }// else if (matchingInBetween == 0 || matchingInBetween == indel.position - last.position) {
-			//if (!newIndels.empty()) newIndels.pop_back();
-		    //} //else { newIndels.push_back(indel); }
-		    id = idn;
-		    //newIndels.push_back(indel);
-		} else {
-		    id = idn;
-		    //newIndels.push_back(indel);
-		}
-	    }
-	}
-    }
-
-    vector<IndelAllele> newIndels;
-    for (vector<IndelAllele>::iterator i = indels.begin(); i != indels.end(); ++i) {
-	if (!i->insertion && i->position == 0) { offset += i->length;
-	} else if (i->length > 0) newIndels.push_back(*i); // remove dels at front
-    }
-
-    // for each indel
-    //     if ( we're matched up to the previous insertion (or deletion) 
-    //          and it's also an insertion or deletion )
-    //         merge the indels
-    //
-    // and simultaneously reconstruct the cigar
-    //
-    // if there are spurious deletions at the start and end of the read, remove them
-    // if there are spurious insertions after soft-clipped bases, make them soft clips
-
-    vector<pair<int, string> > newCigar;
-
-    if (!softBegin.empty()) {
-	newCigar.push_back(make_pair(softBegin.size(), "S"));
-    }
-
-    if (newIndels.empty()) {
-
-	int remainingReadBp = querySequence.size() - softEnd.size() - softBegin.size();
-	newCigar.push_back(make_pair(remainingReadBp, "M"));
-
-	if (!softEnd.empty()) {
-	    newCigar.push_back(make_pair(softEnd.size(), "S"));
-	}
-
-	cigar = joinCIGAR(newCigar);
-
-	// check if we're realigned
-	if (cigar == cigarbefore) {
-	    return false;
-	} else {
-	    return true;
-	}
-    }
-
-    vector<IndelAllele>::iterator id = newIndels.begin();
-    vector<IndelAllele>::iterator last = id++;
-
-    if (last->position > 0) {
-	newCigar.push_back(make_pair(last->position, "M"));
-	newCigar.push_back(make_pair(last->length, (last->insertion ? "I" : "D")));
-    } else if (last->position == 0) { // discard floating indels
-	if (last->insertion) newCigar.push_back(make_pair(last->length, "S"));
-	else  newCigar.push_back(make_pair(last->length, "D"));
-    } else {
-	cerr << "negative indel position " << *last << endl;
-    }
-
-    int lastend = last->insertion ? last->position : (last->position + last->length);
-    LEFTALIGN_DEBUG(*last << ",");
-
-    for (; id != newIndels.end(); ++id) {
-	IndelAllele& indel = *id;
-	if (indel.length == 0) continue; // remove 0-length indels
-	if (debug) cerr << indel << " " << *last << endl;
-	LEFTALIGN_DEBUG(indel << ",");
-	if ((((id + 1) == newIndels.end()
-	    && indel.insertion && indel.position == (int)referenceSequence.size())
-		|| (!indel.insertion && indel.position + indel.length == (int)referenceSequence.size()))
-	    ) {
-	    if (indel.insertion) {
-		if (!newCigar.empty() && newCigar.back().second == "S") {
-		    newCigar.back().first += indel.length;
-		} else {
-		    newCigar.push_back(make_pair(indel.length, "S"));
-		}
-	    }
-	} else if (indel.position < lastend) {
-	    cerr << "impossibility?: indel realigned left of another indel" << endl;
-	    return false;
-	} else if (indel.position == lastend) {
-	    // how?
-	    if (indel.insertion == last->insertion) {
-		pair<int, string>& op = newCigar.back();
-		op.first += indel.length;
-	    } else {
-		newCigar.push_back(make_pair(indel.length, (indel.insertion ? "I" : "D")));
-	    }
-        } else if (indel.position > lastend) {  // also catches differential indels, but with the same position
-	    if (!newCigar.empty() && newCigar.back().second == "M") newCigar.back().first += indel.position - lastend;
-	    else newCigar.push_back(make_pair(indel.position - lastend, "M"));
-            newCigar.push_back(make_pair(indel.length, (indel.insertion ? "I" : "D")));
-        }
-
-	last = id;
-	lastend = last->insertion ? last->position : (last->position + last->length);
-
-	if (debug) {
-	    for (vector<pair<int, string> >::iterator c = newCigar.begin(); c != newCigar.end(); ++c)
-		cerr << c->first << c->second;
-	    cerr << endl;
-	}
-
-    }
-
-    int remainingReadBp = querySequence.size() - (last->readPosition + last->readLength()) - softEnd.size();
-    if (remainingReadBp > 0) {
-	if (debug) cerr << "bp remaining = " << remainingReadBp << endl;
-	if (newCigar.back().second == "M") newCigar.back().first += remainingReadBp;
-	else newCigar.push_back(make_pair(remainingReadBp, "M"));
-    }
-
-    if (newCigar.back().second == "D") newCigar.pop_back(); // remove trailing deletions
-
-    if (!softEnd.empty()) {
-	if (newCigar.back().second == "S") newCigar.back().first += softEnd.size();
-	else newCigar.push_back(make_pair(softEnd.size(), "S"));
-    }
-
-    LEFTALIGN_DEBUG(endl);
-
-    cigar = joinCIGAR(newCigar);
-
-    LEFTALIGN_DEBUG(cigar << endl);
-
-    // check if we're realigned
-    if (cigar == cigarbefore) {
-        return false;
-    } else {
-        return true;
-    }
-
-}
-
-int countMismatches(string& querySequence, string& cigar, string referenceSequence) {
-
-    int mismatches = 0;
-    int sp = 0;
-    int rp = 0;
-    vector<pair<int, string> > cigarData = splitCIGAR(cigar);
-    for (vector<pair<int, string> >::const_iterator c = cigarData.begin();
-        c != cigarData.end(); ++c) {
-        unsigned int l = c->first;
-        string t = c->second;
-        if (t == "M") { // match or mismatch
-            for (int i = 0; i < (int)l; ++i) {
-                if (querySequence.at(rp) != referenceSequence.at(sp))
-                    ++mismatches;
-                ++sp;
-                ++rp;
-            }
-        } else if (t == "D") { // deletion
-            sp += l;  // update reference sequence position
-        } else if (t == "I") { // insertion
-            rp += l;  // update read position
-        } else if (t == "S") { // soft clip, clipped sequence present in the read not matching the reference
-            rp += l;
-        } else if (t == "H") { // hard clip on the read, clipped sequence is not present in the read
-        } else if (t == "N") { // skipped region in the reference not present in read, aka splice
-            sp += l;
-        }
-    }
-
-    return mismatches;
-
-}
-
-// Iteratively left-aligns the indels in the alignment until we have a stable
-// realignment.  Returns true on realignment success or non-realignment.
-// Returns false if we exceed the maximum number of realignment iterations.
-//
-bool stablyLeftAlign(string querySequence, string& cigar, string referenceSequence, int& offset, int maxiterations, bool debug) {
-
-    if (!leftAlign(querySequence, cigar, referenceSequence, offset)) {
-
-        LEFTALIGN_DEBUG("did not realign" << endl);
-        return true;
-
-    } else {
-
-        while (leftAlign(querySequence, cigar, referenceSequence, offset) && --maxiterations > 0) {
-            LEFTALIGN_DEBUG("realigning ..." << endl);
-        }
-
-        if (maxiterations <= 0) {
-            return false;
-        } else {
-            return true;
-        }
-    }
-}
-
-string mergeCIGAR(const string& c1, const string& c2) {
-    vector<pair<int, string> > cigar1 = splitCIGAR(c1);
-    vector<pair<int, string> > cigar2 = splitCIGAR(c2);
-    // check if the middle elements are the same
-    if (cigar1.back().second == cigar2.front().second) {
-        cigar1.back().first += cigar2.front().first;
-        cigar2.erase(cigar2.begin());
-    }
-    for (vector<pair<int, string> >::iterator c = cigar2.begin(); c != cigar2.end(); ++c) {
-        cigar1.push_back(*c);
-    }
-    return joinCIGAR(cigar1);
-}
-
-vector<pair<int, string> > splitCIGAR(const string& cigarStr) {
-    vector<pair<int, string> > cigar;
-    string number;
-    string type;
-    // strings go [Number][Type] ...
-    for (string::const_iterator s = cigarStr.begin(); s != cigarStr.end(); ++s) {
-        char c = *s;
-        if (isdigit(c)) {
-            if (type.empty()) {
-                number += c;
-            } else {
-                // signal for next token, push back the last pair, clean up
-                cigar.push_back(make_pair(atoi(number.c_str()), type));
-                number.clear();
-                type.clear();
-                number += c;
-            }
-        } else {
-            type += c;
-        }
-    }
-    if (!number.empty() && !type.empty()) {
-        cigar.push_back(make_pair(atoi(number.c_str()), type));
-    }
-    return cigar;
-}
-
-string joinCIGAR(const vector<pair<int, string> >& cigar) {
-    string cigarStr;
-    for (vector<pair<int, string> >::const_iterator c = cigar.begin(); c != cigar.end(); ++c) {
-        if (c->first) {
-            cigarStr += convert(c->first) + c->second;
-        }
-    }
-    return cigarStr;
-}


=====================================
external/vcflib/smithwaterman/LeftAlign.h deleted
=====================================
@@ -1,32 +0,0 @@
-#ifndef __LEFTALIGN_H
-#define __LEFTALIGN_H
-
-#include <algorithm>
-#include <map>
-#include <vector>
-#include <list>
-#include <utility>
-#include <sstream>
-
-#include "IndelAllele.h"
-#include "convert.h"
-
-#ifdef VERBOSE_DEBUG
-#define LEFTALIGN_DEBUG(msg) \
-    if (debug) { cerr << msg; }
-#else
-#define LEFTALIGN_DEBUG(msg)
-#endif
-
-using namespace std;
-
-bool leftAlign(string& alternateQuery, string& cigar, string& referenceSequence, int& offset, bool debug = false);
-bool stablyLeftAlign(string alternateQuery, string& cigar, string referenceSequence, int& offset, int maxiterations = 20, bool debug = false);
-int countMismatches(string& alternateQuery, string& cigar, string& referenceSequence);
-
-string mergeCIGAR(const string& c1, const string& c2);
-vector<pair<int, string> > splitCIGAR(const string& cigarStr);
-string joinCIGAR(const vector<pair<int, string> >& cigar);
-
-
-#endif


=====================================
external/vcflib/smithwaterman/Makefile deleted
=====================================
@@ -1,34 +0,0 @@
-# =========================================
-# MOSAIK Banded Smith-Waterman Makefile
-# (c) 2009 Michael Stromberg & Wan-Ping Lee
-# =========================================
-
-# ----------------------------------
-# define our source and object files
-# ----------------------------------
-SOURCES= smithwaterman.cpp BandedSmithWaterman.cpp SmithWatermanGotoh.cpp Repeats.cpp LeftAlign.cpp IndelAllele.cpp
-OBJECTS= $(SOURCES:.cpp=.o)
-
-# ----------------
-# compiler options
-# ----------------
-
-CFLAGS=-Wall -O3
-LDFLAGS=-Wl,-s
-#CFLAGS=-g
-PROGRAM=smithwaterman
-LIBS=
-
-all: $(PROGRAM)
-
-.PHONY: all
-
-$(PROGRAM): $(OBJECTS)
-	@echo "  * linking $(PROGRAM)"
-	@$(CXX) $(LDFLAGS) $(CFLAGS) -o $@ $^ $(LIBS)
-
-.PHONY: clean
-
-clean:
-	@echo "Cleaning up."
-	@rm -f *.o $(PROGRAM) *~


=====================================
external/vcflib/smithwaterman/Mosaik.h deleted
=====================================
@@ -1,73 +0,0 @@
-#pragma once
-
-#ifndef WIN32
-//#include "SafeFunctions.h"
-#endif
-
-// ==============
-// MOSAIK version
-// ==============
-
-#define MOSAIK_VERSION_DATE "2009-02-11"
-
-// adopt a major.minor.build version number [1].[1].[3]
-const unsigned char  MOSAIK_MAJOR_VERSION = 0;
-const unsigned char  MOSAIK_MINOR_VERSION = 9;
-const unsigned short MOSAIK_BUILD_VERSION = 899;
-
-// ================================
-// Platform specific variable sizes
-// ================================
-
-// Windows Vista 32-bit
-// Fedora Core 7 32-bit
-// Fedora Core 6 64-bit
-// Itanium2      64-bit
-#define SIZEOF_CHAR          1
-#define SIZEOF_WCHAR         2
-#define SIZEOF_SHORT         2
-#define SIZEOF_INT           4
-#define SIZEOF_FLOAT         4
-#define SIZEOF_DOUBLE        8
-#define SIZEOF_UINT64        8
-#define MOSAIK_LITTLE_ENDIAN 1
-
-#ifdef WIN32
-typedef signed long long    int64_t;
-typedef unsigned long long uint64_t;
-#endif
-
-#define NEGATIVE_ONE_INT     0xffffffff
-#define NEGATIVE_TWO_INT     0xfffffffe
-#define NEGATIVE_THREE_INT   0xfffffffd
-#define NEGATIVE_FOUR_INT    0xfffffffc
-#define MAX_SHORT            0xffff
-
-// ==========================
-// Platform specific file I/O 
-// ==========================
-
-#ifdef WIN32
-const char OS_DIRECTORY_SEPARATOR = '\\';
-#else
-const char OS_DIRECTORY_SEPARATOR = '/';
-#endif
-
-#define DIRECTORY_NAME_LENGTH    255
-
-// ====================================
-// Enable unit test diagnostic messages
-// ====================================
-
-#ifdef UNITTEST
-#define SILENTMODE if(0)
-#else
-#define SILENTMODE
-#endif
-
-// =================
-// Aligner constants
-// =================
-
-const double HASH_REGION_EXTENSION_PERCENT     = 0.025;
-const unsigned char REFERENCE_SEQUENCE_QUALITY = 40;


=====================================
external/vcflib/smithwaterman/Repeats.cpp deleted
=====================================
@@ -1,69 +0,0 @@
-#include "Repeats.h"
-
-map<string, int> repeatCounts(long int position, const string& sequence, int maxsize) {
-    map<string, int> counts;
-    for (int i = 1; i <= maxsize; ++i) {
-        // subseq here i bases
-        string seq = sequence.substr(position, i);
-        // go left.
-
-        int j = position - i;
-        int leftsteps = 0;
-        while (j >= 0 && seq == sequence.substr(j, i)) {
-            j -= i;
-            ++leftsteps;
-        }
-
-        // go right.
-        j = position;
-
-        int rightsteps = 0;
-        while (j + i <= (int)sequence.size() && seq == sequence.substr(j, i)) {
-            j += i;
-            ++rightsteps;
-        }
-        // if we went left and right a non-zero number of times, 
-        if (leftsteps + rightsteps > 1) {
-            counts[seq] = leftsteps + rightsteps;
-        }
-    }
-
-    // filter out redundant repeat information
-    if (counts.size() > 1) {
-        map<string, int> filteredcounts;
-        map<string, int>::iterator c = counts.begin();
-        string prev = c->first;
-        filteredcounts[prev] = c->second;  // shortest sequence
-        ++c;
-        for (; c != counts.end(); ++c) {
-            int i = 0;
-            string seq = c->first;
-            while (i + prev.length() <= seq.length() && seq.substr(i, prev.length()) == prev) {
-                i += prev.length();
-            }
-            if (i < (int)seq.length()) {
-                filteredcounts[seq] = c->second;
-                prev = seq;
-            }
-        }
-        return filteredcounts;
-    } else {
-        return counts;
-    }
-}
-
-bool isRepeatUnit(const string& seq, const string& unit) {
-
-    if (seq.size() % unit.size() != 0) {
-	return false;
-    } else {
-	int maxrepeats = seq.size() / unit.size();
-	for (int i = 0; i < maxrepeats; ++i) {
-	    if (seq.substr(i * unit.size(), unit.size()) != unit) {
-		return false;
-	    }
-	}
-	return true;
-    }
-
-}


=====================================
external/vcflib/smithwaterman/Repeats.h deleted
=====================================
@@ -1,8 +0,0 @@
-#include <iostream>
-#include <string>
-#include <map>
-
-using namespace std;
-
-map<string, int> repeatCounts(long int pos, const string& seq, int maxsize);
-bool isRepeatUnit(const string& seq, const string& unit);


=====================================
external/vcflib/smithwaterman/SWMain.cpp deleted
=====================================
@@ -1,126 +0,0 @@
-#include <iostream>
-#include <string.h>
-//#include "Alignment.h"
-//#include "Benchmark.h"
-//#include "HashRegion.h"
-#include "SmithWatermanGotoh.h"
-#include "BandedSmithWaterman.h"
-
-using namespace std;
-
-int main(int argc, char* argv[]) {
-/*
-	printf("------------------------------------------------------------------------------\n");
-	printf("Banded Smith-Waterman Algorithm (worst case)\n");
-	printf("Michael Stromberg & Wan-Ping Lee  Marth Lab, Boston College Biology Department\n");
-	printf("------------------------------------------------------------------------------\n\n");
-*/
-	// this version simulates the worst case of only a fragment hashing to the
-	// reference sequence. Basically a non-centered diagonal in the Smith-Waterman
-	// dynamic programming matrix.
-
-	// here we simulate a region on the reference that occurs between position 4001
-	// and position 4136. During hashing, only the first 20 bases in the query 
-	// matched perfectly.
-
-	// define the start and end coordinates of the entire reference region
-	//const unsigned int start = 4001;
-	//const unsigned int end   = 4136;
-
-	//const unsigned int testStart = atoi(argv[1]);
-	//const unsigned int testEnd = atoi(argv[2]);
-	//const unsigned int testQueryStart = atoi(argv[3]);
-	//const unsigned int testQueryEnd = atoi(argv[4]);
-	
-	//cout << endl<< "=====================================================" << endl;
-	//cout << testStart << "\t" << testQueryStart << endl;
-	
-	// define the 20 b:q
-	// ases that matched perfectly
-	//HashRegion hr;
-	
-	//=====================================================
-	// defind the hash region
-	// first.first:   reference begin
-	// first.second:  reference end
-	// second.first:  query begin
-	// second.second: query end
-	//=====================================================
-	
-	pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> > hr;
-	hr.first.first   = 5;
-	hr.first.second  = 13;
-	hr.second.first  = 0;
-	hr.second.second = 8;
-
-	//=====================================================
-
-	// for 76 bp reads, we expect as much as 12 mismatches - however this does not
-	// translate to a bandwidth of 12 * 2 + 1 since most of these will be
-	// substitution errors
-	const unsigned char bandwidth = 11;
-
-	// initialize
-	const char* pReference = "ATGGCGGGGATCGGGACACTCGCCGGTGCGGGTACCCTA";
-	const char* pQuery     =      "GGGGATCGGGACACTCGCTCTCCGGTGCGGGTA";
-	
-	const unsigned int referenceLen = strlen(pReference);
-	const unsigned int queryLen     = strlen(pQuery);
-
-	// ==============================================================================================
-	// benchmarking reference on koi.bc.edu when NUM_ITERATIONS = 38000 on 76 bp read (1 try):
-	// CPU time: 23.920 s, wall time: 24.012 s (1582.6 alignments/s)
-	// ==============================================================================================
-	//const unsigned int NUM_ITERATIONS = 38000;
-	//unsigned int NUM_ITERATIONS = 1;
-
-	// create a new Smith-Waterman alignment object
-	CSmithWatermanGotoh sw(10.0f, -9.0f, 15.0f, 6.66f);
-	CBandedSmithWaterman bsw(10.0f, -9.0f, 15.0f, 6.66f, bandwidth);
-
-	// start timing the algorithm
-	//CBenchmark bench;
-	//bench.Start();
-
-	// perform NUM_ITERATIONS alignments
-	//Alignment bswAl;
-	//Alignment swAl;
-	//   referenceBegin, referenceEnd
-	unsigned int referenceSW, referenceBSW;
-	string cigarSW, cigarBSW;
-	//for(unsigned int i = 0; i < NUM_ITERATIONS; i++) {
-	  sw.Align(referenceSW, cigarSW, pReference, referenceLen, pQuery, queryLen);
-	  bsw.Align(referenceBSW, cigarBSW, pReference, referenceLen, pQuery, queryLen, hr);
-	//}
-
-	// stop timing the algorithm
-	//bench.Stop();
-	
-	// calculate the alignments per second
-	//double elapsedWallTime = bench.GetElapsedWallTime();
-	//double alignmentsPerSecond = (double)NUM_ITERATIONS / elapsedWallTime;
-	
-	// show our results
-	//printf("%d\t%d\n", al.ReferenceBegin,al.QueryBegin);
-
-	printf("Smith-Waterman\n");
-	printf("reference:    %s %3u\n", cigarSW.c_str(), referenceSW);
-	printf("Banded Smith-Waterman\n");
-	printf("reference:    %s %3u\n", cigarBSW.c_str(), referenceBSW);
-	/*  
-	printf("Smith-Waterman\n");
-	printf("reference:    %s %3u %3u\n", swAl.Reference.CData(), swAl.ReferenceBegin, swAl.ReferenceEnd);
-        printf("query:        %s %3u %3u\n", swAl.Query.CData(), swAl.QueryBegin, swAl.QueryEnd);
-        printf("mismatches:   %u\n", swAl.NumMismatches);
-	printf("\n");	
-	printf("Banded Smith-Waterman\n");
-	printf("reference:    %s %3u %3u\n", bswAl.Reference.CData(), bswAl.ReferenceBegin, bswAl.ReferenceEnd);
-        printf("query:        %s %3u %3u\n", bswAl.Query.CData(), bswAl.QueryBegin, bswAl.QueryEnd);
-        printf("mismatches:   %u\n", bswAl.NumMismatches);
-	*/
-	//printf("alignments/s: %.1f\n\n", alignmentsPerSecond);
-
-	//bench.DisplayTime("BandedSmithWaterman");
-
-	return 0;
-}


=====================================
external/vcflib/smithwaterman/SmithWatermanGotoh.cpp deleted
=====================================
@@ -1,741 +0,0 @@
-#include "SmithWatermanGotoh.h"
-
-const float CSmithWatermanGotoh::FLOAT_NEGATIVE_INFINITY = (float)-1e+30;
-
-const char CSmithWatermanGotoh::Directions_STOP     = 0;
-const char CSmithWatermanGotoh::Directions_LEFT     = 1;
-const char CSmithWatermanGotoh::Directions_DIAGONAL = 2;
-const char CSmithWatermanGotoh::Directions_UP       = 3;
-
-const int CSmithWatermanGotoh::repeat_size_max      = 12;
-
-CSmithWatermanGotoh::CSmithWatermanGotoh(float matchScore, float mismatchScore, float gapOpenPenalty, float gapExtendPenalty) 
-    : mCurrentMatrixSize(0)
-    , mCurrentAnchorSize(0)
-    , mCurrentQuerySize(0)
-    , mCurrentAQSumSize(0)
-    , mMatchScore(matchScore)
-    , mMismatchScore(mismatchScore)
-    , mGapOpenPenalty(gapOpenPenalty)
-    , mGapExtendPenalty(gapExtendPenalty)
-    , mPointers(NULL)
-    , mSizesOfVerticalGaps(NULL)
-    , mSizesOfHorizontalGaps(NULL)
-    , mQueryGapScores(NULL)
-    , mBestScores(NULL)
-    , mReversedAnchor(NULL)
-    , mReversedQuery(NULL)
-    , mUseHomoPolymerGapOpenPenalty(false)
-    , mUseEntropyGapOpenPenalty(false)
-    , mUseRepeatGapExtensionPenalty(false)
-{
-    CreateScoringMatrix();
-}
-
-CSmithWatermanGotoh::~CSmithWatermanGotoh(void) {
-    if(mPointers)              delete [] mPointers;
-    if(mSizesOfVerticalGaps)   delete [] mSizesOfVerticalGaps;
-    if(mSizesOfHorizontalGaps) delete [] mSizesOfHorizontalGaps;
-    if(mQueryGapScores)        delete [] mQueryGapScores;
-    if(mBestScores)            delete [] mBestScores;
-    if(mReversedAnchor)        delete [] mReversedAnchor;
-    if(mReversedQuery)         delete [] mReversedQuery;
-}
-
-// aligns the query sequence to the reference using the Smith Waterman Gotoh algorithm
-void CSmithWatermanGotoh::Align(unsigned int& referenceAl, string& cigarAl, const string& s1, const string& s2) {
-
-    if((s1.length() == 0) || (s2.length() == 0)) {
-	cout << "ERROR: Found a read with a zero length." << endl;
-	exit(1);
-    }
-
-    unsigned int referenceLen      = s1.length() + 1;
-    unsigned int queryLen          = s2.length() + 1;
-    unsigned int sequenceSumLength = s1.length() + s2.length();
-
-    // reinitialize our matrices
-
-    if((referenceLen * queryLen) > mCurrentMatrixSize) {
-
-	// calculate the new matrix size
-	mCurrentMatrixSize = referenceLen * queryLen;
-
-	// delete the old arrays
-	if(mPointers)              delete [] mPointers;
-	if(mSizesOfVerticalGaps)   delete [] mSizesOfVerticalGaps;
-	if(mSizesOfHorizontalGaps) delete [] mSizesOfHorizontalGaps;
-
-	try {
-
-	    // initialize the arrays
-	    mPointers              = new char[mCurrentMatrixSize];
-	    mSizesOfVerticalGaps   = new short[mCurrentMatrixSize];
-	    mSizesOfHorizontalGaps = new short[mCurrentMatrixSize];
-
-	} catch(bad_alloc) {
-	    cout << "ERROR: Unable to allocate enough memory for the Smith-Waterman algorithm." << endl;
-	    exit(1);
-	}
-    }
-
-    // initialize the traceback matrix to STOP
-    memset((char*)mPointers, 0, SIZEOF_CHAR * queryLen);
-    for(unsigned int i = 1; i < referenceLen; i++) mPointers[i * queryLen] = 0;
-
-    // initialize the gap matrices to 1
-    uninitialized_fill(mSizesOfVerticalGaps, mSizesOfVerticalGaps + mCurrentMatrixSize, 1);
-    uninitialized_fill(mSizesOfHorizontalGaps, mSizesOfHorizontalGaps + mCurrentMatrixSize, 1);
-
-
-    // initialize our repeat counts if they are needed
-    vector<map<string, int> > referenceRepeats;
-    vector<map<string, int> > queryRepeats;
-    //int queryBeginRepeatBases = 0;
-    //int queryEndRepeatBases = 0;
-    if (mUseRepeatGapExtensionPenalty) {
-	for (unsigned int i = 0; i < queryLen; ++i)
-	    queryRepeats.push_back(repeatCounts(i, s2, repeat_size_max));
-	for (unsigned int i = 0; i < referenceLen; ++i)
-	    referenceRepeats.push_back(repeatCounts(i, s1, repeat_size_max));
-
-	// keep only the biggest repeat
-	vector<map<string, int> >::iterator q = queryRepeats.begin();
-	for (; q != queryRepeats.end(); ++q) {
-	    map<string, int>::iterator biggest = q->begin();
-	    map<string, int>::iterator z = q->begin();
-	    for (; z != q->end(); ++z)
-		if (z->first.size() > biggest->first.size()) biggest = z;
-	    z = q->begin();
-	    while (z != q->end()) {
-		if (z != biggest)
-		    q->erase(z++);
-		else ++z;
-	    }
-	}
-
-	q = referenceRepeats.begin();
-	for (; q != referenceRepeats.end(); ++q) {
-	    map<string, int>::iterator biggest = q->begin();
-	    map<string, int>::iterator z = q->begin();
-	    for (; z != q->end(); ++z)
-		if (z->first.size() > biggest->first.size()) biggest = z;
-	    z = q->begin();
-	    while (z != q->end()) {
-		if (z != biggest)
-		    q->erase(z++);
-		else ++z;
-	    }
-	}
-
-	// remove repeat information from ends of queries
-	// this results in the addition of spurious flanking deletions in repeats
-	map<string, int>& qrend = queryRepeats.at(queryRepeats.size() - 2);
-	if (!qrend.empty()) {
-	    int queryEndRepeatBases = qrend.begin()->first.size() * qrend.begin()->second;
-	    for (int i = 0; i < queryEndRepeatBases; ++i)
-		queryRepeats.at(queryRepeats.size() - 2 - i).clear();
-	}
-
-	map<string, int>& qrbegin = queryRepeats.front();
-	if (!qrbegin.empty()) {
-	    int queryBeginRepeatBases = qrbegin.begin()->first.size() * qrbegin.begin()->second;
-	    for (int i = 0; i < queryBeginRepeatBases; ++i)
-		queryRepeats.at(i).clear();
-	}
-
-    }
-
-    //int entropyWindowSize = 8;
-    vector<float> referenceEntropies;
-    vector<float> queryEntropies;
-    /*if (mUseEntropyGapOpenPenalty) {
-	for (unsigned int i = 0; i < queryLen; ++i)
-	    queryEntropies.push_back(
-		shannon_H((char*) &s2[max(0, min((int) i - entropyWindowSize / 2, (int) queryLen - entropyWindowSize - 1))],
-			  entropyWindowSize));
-	for (unsigned int i = 0; i < referenceLen; ++i)
-	    referenceEntropies.push_back(
-		shannon_H((char*) &s1[max(0, min((int) i - entropyWindowSize / 2, (int) referenceLen - entropyWindowSize - 1))],
-			  entropyWindowSize));
-    }
-    */
-    // normalize entropies
-    /*
-    float qsum = 0;
-    float qnorm = 0;
-    float qmax = 0;
-    for (vector<float>::iterator q = queryEntropies.begin(); q != queryEntropies.end(); ++q) {
-	qsum += *q;
-	if (*q > qmax) qmax = *q;
-    }
-    qnorm = qsum / queryEntropies.size();
-    for (vector<float>::iterator q = queryEntropies.begin(); q != queryEntropies.end(); ++q)
-	*q = *q / qsum + qmax;
-
-    float rsum = 0;
-    float rnorm = 0;
-    float rmax = 0;
-    for (vector<float>::iterator r = referenceEntropies.begin(); r != referenceEntropies.end(); ++r) {
-	rsum += *r;
-	if (*r > rmax) rmax = *r;
-    }
-    rnorm = rsum / referenceEntropies.size();
-    for (vector<float>::iterator r = referenceEntropies.begin(); r != referenceEntropies.end(); ++r)
-	*r = *r / rsum + rmax;
-    */
-
-    //
-    // construct
-    //
-
-    // reinitialize our query-dependent arrays
-    if(s2.length() > mCurrentQuerySize) {
-
-	// calculate the new query array size
-	mCurrentQuerySize = s2.length();
-
-	// delete the old arrays
-	if(mQueryGapScores) delete [] mQueryGapScores;
-	if(mBestScores)     delete [] mBestScores;
-
-	// initialize the arrays
-	try {
-
-	    mQueryGapScores = new float[mCurrentQuerySize + 1];
-	    mBestScores     = new float[mCurrentQuerySize + 1];
-
-	} catch(bad_alloc) {
-	    cout << "ERROR: Unable to allocate enough memory for the Smith-Waterman algorithm." << endl;
-	    exit(1);
-	}
-    }
-
-    // reinitialize our reference+query-dependent arrays
-    if(sequenceSumLength > mCurrentAQSumSize) {
-
-	// calculate the new reference array size
-	mCurrentAQSumSize = sequenceSumLength;
-
-	// delete the old arrays
-	if(mReversedAnchor) delete [] mReversedAnchor;
-	if(mReversedQuery)  delete [] mReversedQuery;
-
-	// initialize the arrays
-	try {
-
-	    mReversedAnchor = new char[mCurrentAQSumSize + 1];	// reversed sequence #1
-	    mReversedQuery  = new char[mCurrentAQSumSize + 1];	// reversed sequence #2
-
-	} catch(bad_alloc) {
-	    cout << "ERROR: Unable to allocate enough memory for the Smith-Waterman algorithm." << endl;
-	    exit(1);
-	}
-    }
-
-    // initialize the gap score and score vectors
-    uninitialized_fill(mQueryGapScores, mQueryGapScores + queryLen, FLOAT_NEGATIVE_INFINITY);
-    memset((char*)mBestScores, 0, SIZEOF_FLOAT * queryLen);
-
-    float similarityScore, totalSimilarityScore, bestScoreDiagonal;
-    float queryGapExtendScore, queryGapOpenScore;
-    float referenceGapExtendScore, referenceGapOpenScore, currentAnchorGapScore;
-
-    unsigned int BestColumn = 0;
-    unsigned int BestRow    = 0;
-    float BestScore         = FLOAT_NEGATIVE_INFINITY;
-
-    for(unsigned int i = 1, k = queryLen; i < referenceLen; i++, k += queryLen) {
-
-	currentAnchorGapScore = FLOAT_NEGATIVE_INFINITY;
-	bestScoreDiagonal = mBestScores[0];
-
-	for(unsigned int j = 1, l = k + 1; j < queryLen; j++, l++) {
-
-	    // calculate our similarity score
-	    similarityScore = mScoringMatrix[s1[i - 1] - 'A'][s2[j - 1] - 'A'];
-
-	    // fill the matrices
-	    totalSimilarityScore = bestScoreDiagonal + similarityScore;
-	    
-	    //cerr << "i: " << i << ", j: " << j << ", totalSimilarityScore: " << totalSimilarityScore << endl;
-
-	    queryGapExtendScore = mQueryGapScores[j] - mGapExtendPenalty;
-	    queryGapOpenScore   = mBestScores[j] - mGapOpenPenalty;
-	    
-	    // compute the homo-polymer gap score if enabled
-	    if(mUseHomoPolymerGapOpenPenalty)
-		if((j > 1) && (s2[j - 1] == s2[j - 2]))
-		    queryGapOpenScore = mBestScores[j] - mHomoPolymerGapOpenPenalty;
-	    
-	    // compute the entropy gap score if enabled
-	    if (mUseEntropyGapOpenPenalty) {
-		queryGapOpenScore = 
-		    mBestScores[j] - mGapOpenPenalty 
-		    * max(queryEntropies.at(j), referenceEntropies.at(i))
-		    * mEntropyGapOpenPenalty;
-	    }
-
-	    int gaplen = mSizesOfVerticalGaps[l - queryLen] + 1;
-
-	    if (mUseRepeatGapExtensionPenalty) {
-		map<string, int>& repeats = queryRepeats[j];
-		// does the sequence which would be inserted or deleted in this gap match the repeat structure which it is embedded in?
-		if (!repeats.empty()) {
-
-		    const pair<string, int>& repeat = *repeats.begin();
-		    int repeatsize = repeat.first.size();
-		    if (gaplen != repeatsize && gaplen % repeatsize != 0) {
-			gaplen = gaplen / repeatsize + repeatsize;
-		    }
-
-		    if ((repeat.first.size() * repeat.second) > 3 && gaplen + i < s1.length()) {
-			string gapseq = string(&s1[i], gaplen);
-			if (gapseq == repeat.first || isRepeatUnit(gapseq, repeat.first)) {
-			    queryGapExtendScore = mQueryGapScores[j]
-				+ mRepeatGapExtensionPenalty / (float) gaplen;
-				//    mMaxRepeatGapExtensionPenalty)
-			} else {
-			    queryGapExtendScore = mQueryGapScores[j] - mGapExtendPenalty;
-			}
-		    }
-		} else {
-		    queryGapExtendScore = mQueryGapScores[j] - mGapExtendPenalty;
-		}
-	    }
-		  
-	    if(queryGapExtendScore > queryGapOpenScore) {
-		mQueryGapScores[j] = queryGapExtendScore;
-		mSizesOfVerticalGaps[l] = gaplen;
-	    } else mQueryGapScores[j] = queryGapOpenScore;
-	    
-	    referenceGapExtendScore = currentAnchorGapScore - mGapExtendPenalty;
-	    referenceGapOpenScore   = mBestScores[j - 1] - mGapOpenPenalty;
-		  
-	    // compute the homo-polymer gap score if enabled
-	    if(mUseHomoPolymerGapOpenPenalty)
-		if((i > 1) && (s1[i - 1] == s1[i - 2]))
-		    referenceGapOpenScore = mBestScores[j - 1] - mHomoPolymerGapOpenPenalty;
-		  
-	    // compute the entropy gap score if enabled
-	    if (mUseEntropyGapOpenPenalty) {
-		referenceGapOpenScore = 
-		    mBestScores[j - 1] - mGapOpenPenalty 
-		    * max(queryEntropies.at(j), referenceEntropies.at(i))
-		    * mEntropyGapOpenPenalty;
-	    }
-
-	    gaplen = mSizesOfHorizontalGaps[l - 1] + 1;
-
-	    if (mUseRepeatGapExtensionPenalty) {
-		map<string, int>& repeats = referenceRepeats[i];
-		// does the sequence which would be inserted or deleted in this gap match the repeat structure which it is embedded in?
-		if (!repeats.empty()) {
-
-		    const pair<string, int>& repeat = *repeats.begin();
-		    int repeatsize = repeat.first.size();
-		    if (gaplen != repeatsize && gaplen % repeatsize != 0) {
-			gaplen = gaplen / repeatsize + repeatsize;
-		    }
-
-		    if ((repeat.first.size() * repeat.second) > 3 && gaplen + j < s2.length()) {
-			string gapseq = string(&s2[j], gaplen);
-			if (gapseq == repeat.first || isRepeatUnit(gapseq, repeat.first)) {
-			    referenceGapExtendScore = currentAnchorGapScore
-				+ mRepeatGapExtensionPenalty / (float) gaplen;
-				//mMaxRepeatGapExtensionPenalty)
-			} else {
-			    referenceGapExtendScore = currentAnchorGapScore - mGapExtendPenalty;
-			}
-		    }
-		} else {
-		    referenceGapExtendScore = currentAnchorGapScore - mGapExtendPenalty;
-		}
-	    }
-
-	    if(referenceGapExtendScore > referenceGapOpenScore) {
-		currentAnchorGapScore = referenceGapExtendScore;
-		mSizesOfHorizontalGaps[l] = gaplen;
-	    } else currentAnchorGapScore = referenceGapOpenScore;
-		  
-	    bestScoreDiagonal = mBestScores[j];
-	    mBestScores[j] = MaxFloats(totalSimilarityScore, mQueryGapScores[j], currentAnchorGapScore);
-		  
-		  
-	    // determine the traceback direction
-	    // diagonal (445364713) > stop (238960195) > up (214378647) > left (166504495)
-	    if(mBestScores[j] == 0)                         mPointers[l] = Directions_STOP;
-	    else if(mBestScores[j] == totalSimilarityScore) mPointers[l] = Directions_DIAGONAL;
-	    else if(mBestScores[j] == mQueryGapScores[j])   mPointers[l] = Directions_UP;
-	    else                                            mPointers[l] = Directions_LEFT;
-		  
-	    // set the traceback start at the current cell i, j and score
-	    if(mBestScores[j] > BestScore) {
-		BestRow    = i;
-		BestColumn = j;
-		BestScore  = mBestScores[j];
-	    }
-	}
-    }
-
-    //
-    // traceback
-    //
-
-    // aligned sequences
-    int gappedAnchorLen  = 0;   // length of sequence #1 after alignment
-    int gappedQueryLen   = 0;   // length of sequence #2 after alignment
-    int numMismatches    = 0;   // the mismatched nucleotide count
-
-    char c1, c2;
-
-    int ci = BestRow;
-    int cj = BestColumn;
-    int ck = ci * queryLen;
-
-    // traceback flag
-    bool keepProcessing = true;
-
-    while(keepProcessing) {
-	//cerr << ci << " " << cj << " " << ck << "  ... " << gappedAnchorLen << " " << gappedQueryLen <<  endl;
-
-	// diagonal (445364713) > stop (238960195) > up (214378647) > left (166504495)
-	switch(mPointers[ck + cj]) {
-
-	case Directions_DIAGONAL:
-	    c1 = s1[--ci];
-	    c2 = s2[--cj];
-	    ck -= queryLen;
-
-	    mReversedAnchor[gappedAnchorLen++] = c1;
-	    mReversedQuery[gappedQueryLen++]   = c2;
-
-	    // increment our mismatch counter
-	    if(mScoringMatrix[c1 - 'A'][c2 - 'A'] == mMismatchScore) numMismatches++;	
-	    break;
-
-	case Directions_STOP:
-	    keepProcessing = false;
-	    break;
-
-	case Directions_UP:
-	    for(unsigned int l = 0, len = mSizesOfVerticalGaps[ck + cj]; l < len; l++) {
-		if (ci <= 0) {
-		    keepProcessing = false;
-		    break;
-		}
-		mReversedAnchor[gappedAnchorLen++] = s1[--ci];
-		mReversedQuery[gappedQueryLen++]   = GAP;
-		ck -= queryLen;
-		numMismatches++;
-	    }
-	    break;
-
-	case Directions_LEFT:
-	    for(unsigned int l = 0, len = mSizesOfHorizontalGaps[ck + cj]; l < len; l++) {
-		if (cj <= 0) {
-		    keepProcessing = false;
-		    break;
-		}
-		mReversedAnchor[gappedAnchorLen++] = GAP;
-		mReversedQuery[gappedQueryLen++]   = s2[--cj];
-		numMismatches++;
-	    }
-	    break;
-	}
-    }
-
-    // define the reference and query sequences
-    mReversedAnchor[gappedAnchorLen] = 0;
-    mReversedQuery[gappedQueryLen]   = 0;
-
-    // catch sequences with different lengths
-    if(gappedAnchorLen != gappedQueryLen) {
-	cout << "ERROR: The aligned sequences have different lengths after Smith-Waterman-Gotoh algorithm." << endl;
-	exit(1);
-    }
-
-    // reverse the strings and assign them to our alignment structure
-    reverse(mReversedAnchor, mReversedAnchor + gappedAnchorLen);
-    reverse(mReversedQuery,  mReversedQuery  + gappedQueryLen);
-
-    //alignment.Reference = mReversedAnchor;
-    //alignment.Query     = mReversedQuery;
-
-    // set the reference endpoints
-    //alignment.ReferenceBegin = ci;
-    //alignment.ReferenceEnd   = BestRow - 1;
-    referenceAl = ci;
-
-    // set the query endpoints
-    /*  
-	if(alignment.IsReverseComplement) {
-	alignment.QueryBegin = s2Length - BestColumn;
-	alignment.QueryEnd   = s2Length - cj - 1;
-	// alignment.QueryLength= alignment.QueryBegin - alignment.QueryEnd + 1;
-	} else {
-	alignment.QueryBegin = cj;
-	alignment.QueryEnd   = BestColumn - 1;
-	// alignment.QueryLength= alignment.QueryEnd - alignment.QueryBegin + 1;
-	}
-    */
-
-    // set the query length and number of mismatches
-    //alignment.QueryLength = alignment.QueryEnd - alignment.QueryBegin + 1;
-    //alignment.NumMismatches  = numMismatches;
-
-    unsigned int alLength = strlen(mReversedAnchor);
-    unsigned int m = 0, d = 0, i = 0;
-    bool dashRegion = false;
-    ostringstream oCigar (ostringstream::out);
-    int insertedBases = 0;
-
-    if ( cj != 0 ) {
-	if ( cj > 0 ) {
-	    oCigar << cj << 'S';
-	} else { // how do we get negative cj's?
-	    referenceAl -= cj;
-	    alLength += cj;
-	}
-    }
-	
-    for ( unsigned int j = 0; j < alLength; j++ ) {
-	// m
-	if ( ( mReversedAnchor[j] != GAP ) && ( mReversedQuery[j] != GAP ) ) {
-	    if ( dashRegion ) {
-		if ( d != 0 ) oCigar << d << 'D';
-		else          { oCigar << i << 'I'; insertedBases += i; }
-	    }
-	    dashRegion = false;
-	    m++;
-	    d = 0;
-	    i = 0;
-	}
-	else {
-	    if ( !dashRegion && m )
-		oCigar << m << 'M';
-	    dashRegion = true;
-	    m = 0;
-	    if ( mReversedAnchor[j] == GAP ) {
-		if ( d != 0 ) oCigar << d << 'D';
-		i++;
-		d = 0;
-	    }
-	    else {
-		if ( i != 0) { oCigar << i << 'I'; insertedBases += i; }
-		d++;
-		i = 0;
-	    }
-	}
-    }
-    if      ( m != 0 ) oCigar << m << 'M';
-    else if ( d != 0 ) oCigar << d << 'D';
-    else if ( i != 0 ) oCigar << i << 'I';
-
-    if ( BestColumn != s2.length() )
-	oCigar << s2.length() - BestColumn << 'S';
-
-    cigarAl = oCigar.str();
-
-    // fix the gap order
-    CorrectHomopolymerGapOrder(alLength, numMismatches);
-
-    if (mUseEntropyGapOpenPenalty || mUseRepeatGapExtensionPenalty) {
-	int offset = 0;
-	string oldCigar;
-	try {
-	    oldCigar = cigarAl;
-	    stablyLeftAlign(s2, cigarAl, s1.substr(referenceAl, alLength - insertedBases), offset);
-	} catch (...) {
-	    cerr << "an exception occurred when left-aligning " << s1 << " " << s2 << endl;
-	    cigarAl = oldCigar; // undo the failed left-realignment attempt
-	    offset = 0;
-	}
-	referenceAl += offset;
-    }
-
-}
-
-// creates a simple scoring matrix to align the nucleotides and the ambiguity code N
-void CSmithWatermanGotoh::CreateScoringMatrix(void) {
-
-    unsigned int nIndex = 13;
-    unsigned int xIndex = 23;
-
-    // define the N score to be 1/4 of the span between mismatch and match
-    //const short nScore = mMismatchScore + (short)(((mMatchScore - mMismatchScore) / 4.0) + 0.5);
-
-    // calculate the scoring matrix
-    for(unsigned char i = 0; i < MOSAIK_NUM_NUCLEOTIDES; i++) {
-	for(unsigned char j = 0; j < MOSAIK_NUM_NUCLEOTIDES; j++) {
-
-	    // N.B. matching N to everything (while conceptually correct) leads to some
-	    // bad alignments, lets make N be a mismatch instead.
-
-	    // add the matches or mismatches to the hashtable (N is a mismatch)
-	    if((i == nIndex) || (j == nIndex)) mScoringMatrix[i][j] = mMismatchScore;
-	    else if((i == xIndex) || (j == xIndex)) mScoringMatrix[i][j] = mMismatchScore;
-	    else if(i == j) mScoringMatrix[i][j] = mMatchScore;
-	    else mScoringMatrix[i][j] = mMismatchScore;
-	}
-    }
-
-    // add ambiguity codes
-    mScoringMatrix['M' - 'A']['A' - 'A'] = mMatchScore;	// M - A
-    mScoringMatrix['A' - 'A']['M' - 'A'] = mMatchScore;
-    mScoringMatrix['M' - 'A']['C' - 'A'] = mMatchScore; // M - C
-    mScoringMatrix['C' - 'A']['M' - 'A'] = mMatchScore;
-
-    mScoringMatrix['R' - 'A']['A' - 'A'] = mMatchScore;	// R - A
-    mScoringMatrix['A' - 'A']['R' - 'A'] = mMatchScore;
-    mScoringMatrix['R' - 'A']['G' - 'A'] = mMatchScore; // R - G
-    mScoringMatrix['G' - 'A']['R' - 'A'] = mMatchScore;
-
-    mScoringMatrix['W' - 'A']['A' - 'A'] = mMatchScore;	// W - A
-    mScoringMatrix['A' - 'A']['W' - 'A'] = mMatchScore;
-    mScoringMatrix['W' - 'A']['T' - 'A'] = mMatchScore; // W - T
-    mScoringMatrix['T' - 'A']['W' - 'A'] = mMatchScore;
-
-    mScoringMatrix['S' - 'A']['C' - 'A'] = mMatchScore;	// S - C
-    mScoringMatrix['C' - 'A']['S' - 'A'] = mMatchScore;
-    mScoringMatrix['S' - 'A']['G' - 'A'] = mMatchScore; // S - G
-    mScoringMatrix['G' - 'A']['S' - 'A'] = mMatchScore;
-
-    mScoringMatrix['Y' - 'A']['C' - 'A'] = mMatchScore;	// Y - C
-    mScoringMatrix['C' - 'A']['Y' - 'A'] = mMatchScore;
-    mScoringMatrix['Y' - 'A']['T' - 'A'] = mMatchScore; // Y - T
-    mScoringMatrix['T' - 'A']['Y' - 'A'] = mMatchScore;
-
-    mScoringMatrix['K' - 'A']['G' - 'A'] = mMatchScore;	// K - G
-    mScoringMatrix['G' - 'A']['K' - 'A'] = mMatchScore;
-    mScoringMatrix['K' - 'A']['T' - 'A'] = mMatchScore; // K - T
-    mScoringMatrix['T' - 'A']['K' - 'A'] = mMatchScore;
-
-    mScoringMatrix['V' - 'A']['A' - 'A'] = mMatchScore;	// V - A
-    mScoringMatrix['A' - 'A']['V' - 'A'] = mMatchScore;
-    mScoringMatrix['V' - 'A']['C' - 'A'] = mMatchScore; // V - C
-    mScoringMatrix['C' - 'A']['V' - 'A'] = mMatchScore;
-    mScoringMatrix['V' - 'A']['G' - 'A'] = mMatchScore; // V - G
-    mScoringMatrix['G' - 'A']['V' - 'A'] = mMatchScore;
-
-    mScoringMatrix['H' - 'A']['A' - 'A'] = mMatchScore;	// H - A
-    mScoringMatrix['A' - 'A']['H' - 'A'] = mMatchScore;
-    mScoringMatrix['H' - 'A']['C' - 'A'] = mMatchScore; // H - C
-    mScoringMatrix['C' - 'A']['H' - 'A'] = mMatchScore;
-    mScoringMatrix['H' - 'A']['T' - 'A'] = mMatchScore; // H - T
-    mScoringMatrix['T' - 'A']['H' - 'A'] = mMatchScore;
-
-    mScoringMatrix['D' - 'A']['A' - 'A'] = mMatchScore;	// D - A
-    mScoringMatrix['A' - 'A']['D' - 'A'] = mMatchScore;
-    mScoringMatrix['D' - 'A']['G' - 'A'] = mMatchScore; // D - G
-    mScoringMatrix['G' - 'A']['D' - 'A'] = mMatchScore;
-    mScoringMatrix['D' - 'A']['T' - 'A'] = mMatchScore; // D - T
-    mScoringMatrix['T' - 'A']['D' - 'A'] = mMatchScore;
-
-    mScoringMatrix['B' - 'A']['C' - 'A'] = mMatchScore;	// B - C
-    mScoringMatrix['C' - 'A']['B' - 'A'] = mMatchScore;
-    mScoringMatrix['B' - 'A']['G' - 'A'] = mMatchScore; // B - G
-    mScoringMatrix['G' - 'A']['B' - 'A'] = mMatchScore;
-    mScoringMatrix['B' - 'A']['T' - 'A'] = mMatchScore; // B - T
-    mScoringMatrix['T' - 'A']['B' - 'A'] = mMatchScore;
-}
-
-// enables homo-polymer scoring
-void CSmithWatermanGotoh::EnableHomoPolymerGapPenalty(float hpGapOpenPenalty) {
-    mUseHomoPolymerGapOpenPenalty = true;
-    mHomoPolymerGapOpenPenalty    = hpGapOpenPenalty;
-}
-
-// enables entropy-based gap open penalty
-void CSmithWatermanGotoh::EnableEntropyGapPenalty(float enGapOpenPenalty) {
-    mUseEntropyGapOpenPenalty = true;
-    mEntropyGapOpenPenalty    = enGapOpenPenalty;
-}
-
-// enables repeat-aware gap extension penalty
-void CSmithWatermanGotoh::EnableRepeatGapExtensionPenalty(float rGapExtensionPenalty, float rMaxGapRepeatExtensionPenaltyFactor) {
-    mUseRepeatGapExtensionPenalty = true;
-    mRepeatGapExtensionPenalty    = rGapExtensionPenalty;
-    mMaxRepeatGapExtensionPenalty = rMaxGapRepeatExtensionPenaltyFactor * rGapExtensionPenalty;
-}
-
-// corrects the homopolymer gap order for forward alignments
-void CSmithWatermanGotoh::CorrectHomopolymerGapOrder(const unsigned int numBases, const unsigned int numMismatches) {
-
-    // this is only required for alignments with mismatches
-    //if(al.NumMismatches == 0) return;
-    if ( numMismatches == 0 ) return;
-
-    // localize the alignment data
-    //char* pReference = al.Reference.Data();
-    //char* pQuery     = al.Query.Data();
-    //const unsigned int numBases = al.Reference.Length();
-    char* pReference = mReversedAnchor;
-    char* pQuery     = mReversedQuery;
-
-    // initialize
-    bool hasReferenceGap = false, hasQueryGap = false;
-    char* pNonGapSeq = NULL;
-    char* pGapSeq    = NULL;
-    char nonGapBase  = 'J';
-
-    // identify gapped regions
-    for(unsigned int i = 0; i < numBases; i++) {
-
-	// check if the current position is gapped
-	hasReferenceGap = false;
-	hasQueryGap     = false;
-
-	if(pReference[i] == GAP) {
-	    hasReferenceGap = true;
-	    pNonGapSeq      = pQuery;
-	    pGapSeq         = pReference;
-	    nonGapBase      = pQuery[i];
-	}
-
-	if(pQuery[i] == GAP) {
-	    hasQueryGap = true;
-	    pNonGapSeq  = pReference;
-	    pGapSeq     = pQuery;
-	    nonGapBase  = pReference[i];
-	}
-
-	// continue if we don't have any gaps
-	if(!hasReferenceGap && !hasQueryGap) continue;
-
-	// sanity check
-	if(hasReferenceGap && hasQueryGap) {
-	    printf("ERROR: Found a gap in both the reference sequence and query sequence.\n");
-	    exit(1);
-	}
-
-	// find the non-gapped length (forward)
-	unsigned short numGappedBases = 0;
-	unsigned short nonGapLength   = 0;
-	unsigned short testPos = i;
-	while(testPos < numBases) {
-
-	    const char gs  = pGapSeq[testPos];
-	    const char ngs = pNonGapSeq[testPos];
-
-	    bool isPartofHomopolymer = false;
-	    if(((gs == nonGapBase) || (gs == GAP)) && (ngs == nonGapBase)) isPartofHomopolymer = true;
-	    if(!isPartofHomopolymer) break;
-
-	    if(gs == GAP) numGappedBases++;
-	    else nonGapLength++;
-	    testPos++;
-	}
-
-	// fix the gap order
-	if(numGappedBases != 0) {
-	    char* pCurrentSequence = pGapSeq + i;
-	    memset(pCurrentSequence, nonGapBase, nonGapLength);
-	    pCurrentSequence += nonGapLength;
-	    memset(pCurrentSequence, GAP, numGappedBases);
-	}
-
-	// increment
-	i += numGappedBases + nonGapLength - 1;
-    }
-}


=====================================
external/vcflib/smithwaterman/SmithWatermanGotoh.h deleted
=====================================
@@ -1,100 +0,0 @@
-#pragma once
-
-#include <iostream>
-#include <algorithm>
-#include <memory>
-//#include "Alignment.h"
-#include "Mosaik.h"
-#include <stdio.h>
-#include <string.h>
-#include <sstream>
-#include <string>
-#include "Repeats.h"
-#include "LeftAlign.h"
-
-using namespace std;
-
-#define MOSAIK_NUM_NUCLEOTIDES 26
-#define GAP '-'
-
-class CSmithWatermanGotoh {
-public:
-    // constructor
-    CSmithWatermanGotoh(float matchScore, float mismatchScore, float gapOpenPenalty, float gapExtendPenalty);
-    // destructor
-    ~CSmithWatermanGotoh(void);
-    // aligns the query sequence to the reference using the Smith Waterman Gotoh algorithm
-    void Align(unsigned int& referenceAl, string& cigarAl, const string& s1, const string& s2);
-    // enables homo-polymer scoring
-    void EnableHomoPolymerGapPenalty(float hpGapOpenPenalty);
-    // enables non-repeat gap open penalty
-    void EnableEntropyGapPenalty(float enGapOpenPenalty);
-    // enables repeat gap extension penalty
-    void EnableRepeatGapExtensionPenalty(float rGapExtensionPenalty, float rMaxGapRepeatExtensionPenaltyFactor = 10);
-private:
-    // creates a simple scoring matrix to align the nucleotides and the ambiguity code N
-    void CreateScoringMatrix(void);
-    // corrects the homopolymer gap order for forward alignments
-    void CorrectHomopolymerGapOrder(const unsigned int numBases, const unsigned int numMismatches);
-    // returns the maximum floating point number
-    static inline float MaxFloats(const float& a, const float& b, const float& c);
-    // our simple scoring matrix
-    float mScoringMatrix[MOSAIK_NUM_NUCLEOTIDES][MOSAIK_NUM_NUCLEOTIDES];
-    // keep track of maximum initialized sizes
-    unsigned int mCurrentMatrixSize;
-    unsigned int mCurrentAnchorSize;
-    unsigned int mCurrentQuerySize;
-    unsigned int mCurrentAQSumSize;
-    // define our traceback directions
-    // N.B. This used to be defined as an enum, but gcc doesn't like being told
-    // which storage class to use
-    const static char Directions_STOP;
-    const static char Directions_LEFT;
-    const static char Directions_DIAGONAL;
-    const static char Directions_UP;
-    // repeat structure determination
-    const static int repeat_size_max;
-    // define scoring constants
-    const float mMatchScore;
-    const float mMismatchScore;
-    const float mGapOpenPenalty;
-    const float mGapExtendPenalty;
-    // store the backtrace pointers
-    char* mPointers;
-    // store the vertical gap sizes - assuming gaps are not longer than 32768 bases long
-    short* mSizesOfVerticalGaps;
-    // store the horizontal gap sizes - assuming gaps are not longer than 32768 bases long
-    short* mSizesOfHorizontalGaps;	
-    // score if xi aligns to a gap after yi
-    float* mQueryGapScores;
-    // best score of alignment x1...xi to y1...yi
-    float* mBestScores;
-    // our reversed alignment
-    char* mReversedAnchor;
-    char* mReversedQuery;
-    // define static constants
-    static const float FLOAT_NEGATIVE_INFINITY;
-    // toggles the use of the homo-polymer gap open penalty
-    bool mUseHomoPolymerGapOpenPenalty;
-    // specifies the homo-polymer gap open penalty
-    float mHomoPolymerGapOpenPenalty;
-    // toggles the use of the entropy gap open penalty
-    bool mUseEntropyGapOpenPenalty;
-    // specifies the entropy gap open penalty (multiplier)
-    float mEntropyGapOpenPenalty;
-    // toggles the use of the repeat gap extension penalty
-    bool mUseRepeatGapExtensionPenalty;
-    // specifies the repeat gap extension penalty
-    float mRepeatGapExtensionPenalty;
-    // specifies the max repeat gap extension penalty
-    float mMaxRepeatGapExtensionPenalty;
-};
-
-// returns the maximum floating point number
-inline float CSmithWatermanGotoh::MaxFloats(const float& a, const float& b, const float& c) {
-    float max = 0.0f;
-    if(a > max) max = a;
-    if(b > max) max = b;
-    if(c > max) max = c;
-    return max;
-}


=====================================
external/vcflib/smithwaterman/convert.h deleted
=====================================
@@ -1,22 +0,0 @@
-#ifndef __CONVERT_H
-#define __CONVERT_H
-
-#include <sstream>
-
-// converts the string into the specified type, setting r to the converted
-// value and returning true/false on success or failure
-template<typename T>
-bool convert(const std::string& s, T& r) {
-    std::istringstream iss(s);
-    iss >> r;
-    return iss.eof() ? true : false;
-}
-
-template<typename T>
-std::string convert(const T& r) {
-    std::ostringstream iss;
-    iss << r;
-    return iss.str();
-}
-
-#endif


=====================================
external/vcflib/smithwaterman/smithwaterman.cpp deleted
=====================================
@@ -1,246 +0,0 @@
-#include <iostream>
-#include <string.h>
-#include <string>
-#include <sstream>
-#include <getopt.h>
-#include <utility>
-#include <vector>
-#include <stdlib.h>
-#include "SmithWatermanGotoh.h"
-#include "BandedSmithWaterman.h"
-
-using namespace std;
-
-void printSummary(void) {
-    cerr << "usage: smithwaterman [options] <reference sequence> <query sequence>" << endl
-         << endl
-         << "options:" << endl 
-         << "    -m, --match-score         the match score (default 10.0)" << endl
-         << "    -n, --mismatch-score      the mismatch score (default -9.0)" << endl
-         << "    -g, --gap-open-penalty    the gap open penalty (default 15.0)" << endl
-	 << "    -z, --entropy-gap-open-penalty  enable entropy scaling of the gap open penalty" << endl
-         << "    -e, --gap-extend-penalty  the gap extend penalty (default 6.66)" << endl
-	 << "    -r, --repeat-gap-extend-penalty  use repeat information when generating gap extension penalties" << endl
-         << "    -b, --bandwidth           bandwidth to use (default 0, or non-banded algorithm)" << endl
-         << "    -p, --print-alignment     print out the alignment" << endl
-         << endl
-         << "When called with literal reference and query sequences, smithwaterman" << endl
-         << "prints the cigar match positional string and the match position for the" << endl
-         << "query sequence against the reference sequence." << endl;
-}
-
-
-int main (int argc, char** argv) {
-
-    int c;
-
-    string reference;
-    string query;
-
-    int bandwidth = 0;
-
-    float matchScore = 10.0f;
-    float mismatchScore = -9.0f;
-    float gapOpenPenalty = 15.0f;
-    float gapExtendPenalty = 6.66f;
-    float entropyGapOpenPenalty = 0.0f;
-    bool useRepeatGapExtendPenalty = false;
-    float repeatGapExtendPenalty = 1.0f;
-    
-    bool print_alignment = false;
-
-    while (true) {
-        static struct option long_options[] =
-        {
-            {"help", no_argument, 0, 'h'},
-            {"match-score",  required_argument, 0, 'm'},
-            {"mismatch-score",  required_argument, 0, 'n'},
-            {"gap-open-penalty",  required_argument, 0, 'g'},
-            {"entropy-gap-open-penalty",  required_argument, 0, 'z'},
-            {"gap-extend-penalty",  required_argument, 0, 'e'},
-            {"repeat-gap-extend-penalty",  required_argument, 0, 'r'},
-            {"print-alignment",  required_argument, 0, 'p'},
-            {"bandwidth", required_argument, 0, 'b'},
-            {0, 0, 0, 0}
-        };
-        int option_index = 0;
-
-        c = getopt_long (argc, argv, "hpzm:n:g:r:e:b:r:",
-                         long_options, &option_index);
-
-          if (c == -1)
-            break;
- 
-          switch (c)
-            {
-            case 0:
-            /* If this option set a flag, do nothing else now. */
-            if (long_options[option_index].flag != 0)
-              break;
-            printf ("option %s", long_options[option_index].name);
-            if (optarg)
-              printf (" with arg %s", optarg);
-            printf ("\n");
-            break;
-
-          case 'm':
-            matchScore = atof(optarg);
-            break;
- 
-          case 'n':
-            mismatchScore = atof(optarg);
-            break;
- 
-          case 'g':
-            gapOpenPenalty = atof(optarg);
-            break;
-
-	  case 'z':
- 	    entropyGapOpenPenalty = 1;
-	    break;
- 
-	  case 'r':
-	    useRepeatGapExtendPenalty = true;
-	    repeatGapExtendPenalty = atof(optarg);
-	    break;
- 
-          case 'e':
-            gapExtendPenalty = atof(optarg);
-            break;
-
-          case 'b':
-            bandwidth = atoi(optarg);
-            break;
-
-          case 'p':
-            print_alignment = true;
-            break;
- 
-          case 'h':
-            printSummary();
-            exit(0);
-            break;
- 
-          case '?':
-            /* getopt_long already printed an error message. */
-            printSummary();
-            exit(1);
-            break;
- 
-          default:
-            abort ();
-          }
-      }
-
-    /* Print any remaining command line arguments (not options). */
-    if (optind == argc - 2) {
-        //cerr << "fasta file: " << argv[optind] << endl;
-        reference = string(argv[optind]);
-        ++optind;
-        query = string(argv[optind]);
-    } else {
-        cerr << "please specify a reference and query sequence" << endl
-             << "execute " << argv[0] << " --help for command-line usage" << endl;
-        exit(1);
-    }
-
-	// initialize
-	
-	unsigned int referencePos;
-	string cigar;
-
-	// create a new Smith-Waterman alignment object
-    if (bandwidth > 0) {
-        pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> > hr;
-        hr.first.first   = 2;
-        hr.first.second  = 18;
-        hr.second.first  = 1;
-        hr.second.second = 17;
-        CBandedSmithWaterman bsw(matchScore, mismatchScore, gapOpenPenalty, gapExtendPenalty, bandwidth);
-        bsw.Align(referencePos, cigar, reference, query, hr);
-    } else {
-        CSmithWatermanGotoh sw(matchScore, mismatchScore, gapOpenPenalty, gapExtendPenalty);
-	if (useRepeatGapExtendPenalty)
-	    sw.EnableRepeatGapExtensionPenalty(repeatGapExtendPenalty);
-	if (entropyGapOpenPenalty > 0)
-	    sw.EnableEntropyGapPenalty(entropyGapOpenPenalty);
-        sw.Align(referencePos, cigar, reference, query);
-    }
-
-    printf("%s %3u\n", cigar.c_str(), referencePos);
-
-    // optionally print out the alignment
-    if (print_alignment) {
-        int alignmentLength = 0;
-        int len;
-        string slen;
-        vector<pair<int, char> > cigarData;
-        for (string::iterator c = cigar.begin(); c != cigar.end(); ++c) {
-            switch (*c) {
-                case 'I':
-                    len = atoi(slen.c_str());
-                    slen.clear();
-                    cigarData.push_back(make_pair(len, *c));
-                    break;
-                case 'D':
-                    len = atoi(slen.c_str());
-                    alignmentLength += len;
-                    slen.clear();
-                    cigarData.push_back(make_pair(len, *c));
-                    break;
-                case 'M':
-                    len = atoi(slen.c_str());
-                    alignmentLength += len;
-                    slen.clear();
-                    cigarData.push_back(make_pair(len, *c));
-                    break;
-                case 'S':
-                    len = atoi(slen.c_str());
-                    slen.clear();
-                    cigarData.push_back(make_pair(len, *c));
-                    break;
-                default:
-                    len = 0;
-                    slen += *c;
-                    break;
-            }
-        }
-
-        string gapped_ref = string(reference).substr(referencePos, alignmentLength);
-        string gapped_query = string(query);
-
-        int refpos = 0;
-        int readpos = 0;
-        for (vector<pair<int, char> >::iterator c = cigarData.begin(); c != cigarData.end(); ++c) {
-            int len = c->first;
-            switch (c->second) {
-                case 'I':
-                    gapped_ref.insert(refpos, string(len, '-'));
-                    readpos += len;
-		    refpos += len;
-                    break;
-                case 'D':
-                    gapped_query.insert(readpos, string(len, '-'));
-                    refpos += len;
-		    readpos += len;
-                    break;
-                case 'M':
-		    readpos += len;
-		    refpos += len;
-		    break;
-                case 'S':
-                    readpos += len;
-		    gapped_ref.insert(refpos, string(len, '*'));
-		    refpos += len;
-                    break;
-                default:
-                    break;
-            }
-        }
-
-        cout << gapped_ref << endl << gapped_query << endl;
-    }
-
-	return 0;
-
-}



View it on GitLab: https://salsa.debian.org/med-team/tvc/compare/afcf9f4181f26045d9cabe41d6a48b148641cf9f...f5165aac845c0cc9f6d386b02d06ff3a3cb49119

-- 
View it on GitLab: https://salsa.debian.org/med-team/tvc/compare/afcf9f4181f26045d9cabe41d6a48b148641cf9f...f5165aac845c0cc9f6d386b02d06ff3a3cb49119
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20180927/b5ac569b/attachment-0001.html>