[med-svn] [dindel] 01/02: Imported Upstream version 1.01+dfsg

Steffen Möller moeller at moszumanska.debian.org
Fri Mar 18 15:37:00 UTC 2016


This is an automated email from the git hooks/post-receive script.

moeller pushed a commit to branch master
in repository dindel.

commit 6328b6425902329c9fc70bc71092e9b83a07ab51
Author: Steffen Möller <moeller at debian.org>
Date:   Fri Mar 18 16:32:43 2016 +0100

    Imported Upstream version 1.01+dfsg
---
 DInDel.cpp                | 4309 +++++++++++++++++++++++++++++++++++++++++++++
 DInDel.hpp                |  397 +++++
 Fasta.hpp                 |   72 +
 Faster.cpp                |  785 +++++++++
 Faster.hpp                |  101 ++
 GetCandidates.cpp         |  498 ++++++
 GetCandidates.hpp         |  107 ++
 HapBlock.cpp              |  204 +++
 HapBlock.hpp              |   57 +
 Haplotype.hpp             |  389 ++++
 HaplotypeDistribution.cpp |  486 +++++
 HaplotypeDistribution.hpp |  498 ++++++
 Library.hpp               |  258 +++
 MLAlignment.hpp           |   78 +
 Makefile                  |   15 +
 MyBam.hpp                 |   98 ++
 ObservationModel.hpp      |  103 ++
 ObservationModelFB.cpp    | 1829 +++++++++++++++++++
 ObservationModelFB.hpp    |  169 ++
 ObservationModelSeqAn.hpp |  377 ++++
 OutputData.hpp            |  121 ++
 README                    |   15 +
 Read.hpp                  |  451 +++++
 ReadIndelErrorModel.hpp   |   57 +
 StringHash.hpp            |   40 +
 Utils.hpp                 |   51 +
 Variant.hpp               |  179 ++
 VariantFile.hpp           |  304 ++++
 digamma.hpp               |  450 +++++
 foreach.hpp               |  812 +++++++++
 30 files changed, 13310 insertions(+)

diff --git a/DInDel.cpp b/DInDel.cpp
new file mode 100644
index 0000000..a292606
--- /dev/null
+++ b/DInDel.cpp
@@ -0,0 +1,4309 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#include <stdlib.h>
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include <sstream>
+#include <fstream>
+#include <set>
+#include <algorithm>
+#include <boost/program_options.hpp>
+#include <boost/math/special_functions/digamma.hpp>
+#include <seqan/align.h>
+#include <seqan/graph_align.h>
+#include "foreach.hpp"
+#include "bam.h"
+#include "DInDel.hpp"
+#include "Haplotype.hpp"
+#include "HaplotypeDistribution.hpp"
+#include "ObservationModelFB.hpp"
+#include "Utils.hpp"
+#include "faidx.h"
+#include "GetCandidates.hpp"
+#include "ObservationModelSeqAn.hpp"
+#include "VariantFile.hpp"
+#include "Faster.hpp"
+#include <ext/hash_map>
+#include <exception>
+
+const int USECALLWINDOW=0;
+//using namespace seqan;
+using namespace seqan;
+namespace po = boost::program_options;
+
+using namespace std;
+//using namespace fasta;
+
+
+DetInDel::DetInDel(const string & bfName, const Parameters & _params, int multipleFiles) : params(_params)
+
+{
+	fai=NULL;
+	if (params.alignAgainstReference) {
+		fai = fai_load(params.refFileName.c_str());
+		if (!fai) {
+			cerr << "Cannot open reference sequence file." << endl;
+			params.alignAgainstReference=false;
+			exit(1);
+		}
+	}
+
+	if (multipleFiles==0) {
+		myBams.push_back(new MyBam(bfName));
+	} else {
+
+		ifstream file(bfName.c_str());
+		if (!file.is_open()) {
+			   cout << "Cannot open file with BAM files:  " << bfName << endl;
+			   throw string("File open error.");
+		}
+		while (!file.eof()) {
+			string line;
+			getline(file, line);
+			if (!line.empty()) {
+				istringstream is(line);
+				string fname;
+				is >> fname;
+				if (!fname.empty()) {
+					cout << "Reading BAM file " << fname << endl;
+					myBams.push_back(new MyBam(fname));
+					myBamsFileNames.push_back(fname);
+				}
+			}
+		}
+		file.close();
+	}
+}
+
+DetInDel::~DetInDel()
+{
+	if (params.alignAgainstReference && fai) {
+		fai_destroy(fai);
+	}
+	for (size_t b=0;b<myBams.size();b++) delete myBams[b];
+}
+
+void DetInDel::analyzeDifference(const pair<Haplotype, Haplotype> & hp1, const vector<Read> & reads,  uint32_t leftPos, uint32_t rightPos)
+{
+	cout << "Inference results" << endl;
+
+	if (params.analyzeLowFreqDiffThreshold<-100.0) {
+		size_t offset=50;
+		cout << "Haplotype pair: " << endl;
+		cout << hp1.first << endl << hp1.second << endl;
+
+
+		cout << "h.1 alignment: " << endl;
+		cout << string(offset,' ') << hp1.first.seq << endl;
+		for (size_t r=0;r<reads.size();r++) {
+			ObservationModelFBMax om(hp1.first, reads[r], leftPos,params.obsParams);
+			MLAlignment ml=om.calcLikelihood();
+
+			double lm=ml.ll;
+			ObservationModelFBMax op(hp1.second, reads[r], leftPos, params.obsParams);
+			MLAlignment ml2=op.calcLikelihood();
+			double lp=ml2.ll;
+
+			if (lm>=lp) om.printAlignment(offset);
+		}
+
+		cout << "h.2 alignment: " << endl;
+		cout << string(offset,' ') << hp1.second.seq << endl;
+		for (size_t r=0;r<reads.size();r++) {
+			ObservationModelFBMax om(hp1.first, reads[r], leftPos,params.obsParams);
+			MLAlignment ml=om.calcLikelihood();
+
+			double lm=ml.ll;
+			ObservationModelFBMax op(hp1.second, reads[r], leftPos, params.obsParams);
+			MLAlignment ml2=op.calcLikelihood();
+			double lp=ml2.ll;
+
+			if (lp>=lm) op.printAlignment(offset);
+
+
+		}
+
+
+
+	} else {
+
+		double ll=0.0,l1=0.0, l2=0.0;
+		vector<size_t > show;
+		for (size_t r=0;r<reads.size();r++) {
+			ObservationModelFBMax om(hp1.first, reads[r], leftPos,params.obsParams);
+			MLAlignment ml=om.calcLikelihood();
+
+			double lm=ml.ll;
+			ObservationModelFBMax op(hp1.second, reads[r], leftPos, params.obsParams);
+			MLAlignment ml2=op.calcLikelihood();
+			double lp=ml2.ll;
+			double dll=log(exp(lp)+exp(lm))+log(.5);
+			l1+=(addLogs(lm,lm)+log(.5));
+			l2+=(addLogs(lp,lp)+log(.5));
+			ll+=dll;
+
+			if (lp-lm>params.analyzeLowFreqDiffThreshold) {
+				show.push_back(r);
+			}
+	//		cout << "read[" << r <<"]: 1-mq: " << 1.0-reads[r].mapQual << " first hap lik: " << lm << " second hap lik: " << lp << " combined: " << dll << " lp+lm: " << ll << " lm+lm: " << l1 << " lp+lp: " << l2 << endl;
+
+		}
+
+		size_t offset=50;
+		if (show.size()) {
+			cout << "Haplotype pair: " << endl;
+			cout << hp1.first << endl << hp1.second << endl;
+
+
+			cout << "h.1 alignment: " << endl;
+			cout << string(offset,' ') << hp1.first.seq << endl;
+
+			for (size_t i=0;i<show.size();i++) {
+				Read rr=reads[show[i]];
+				ObservationModelFBMax om2(hp1.first, rr, leftPos, params.obsParams);
+				om2.calcLikelihood();
+
+		//		om2.computeMarginals();
+				om2.printAlignment(offset);
+			}
+
+			cout << endl << endl;
+			cout << "h.2 alignment: " << endl;
+			cout << string(offset,' ') << hp1.second.seq << endl;
+
+			for (size_t i=0;i<show.size();i++) {
+				Read rr=reads[show[i]];
+				ObservationModelFBMax om2(hp1.second, rr, leftPos, params.obsParams);
+				om2.calcLikelihood();
+		//		om2.computeMarginals();
+				om2.printAlignment(offset);
+			}
+		}
+		else { cout << "No differences in log-likelihoods over threshold." << endl; };
+	}
+
+}
+
+void DetInDel::showAlignments(const pair<Haplotype, Haplotype> & hp1, const vector<Read> & reads,  uint32_t leftPos, uint32_t rightPos)
+{
+	cout << "Inference results" << endl;
+
+	double ll=0.0;
+	int offset=50;
+	vector <double> lf(reads.size(),0.0), ls(reads.size(),0);
+	cout << "h.1 alignment: " << endl;
+	cout << string(offset,' ') << hp1.first.seq << endl;
+	for (size_t r=0;r<reads.size();r++) {
+		ObservationModelFBMax om(hp1.first, reads[r], leftPos,params.obsParams);
+		double lm=om.getLogLikelihood();
+		lf[r]=lm;
+		if (lm<params.analyzeLowFreqDiffThreshold) {
+			om.printAlignment(offset);
+		}
+	}
+
+	cout << "h.2 alignment: " << endl;
+	cout << string(offset,' ') << hp1.second.seq << endl;
+	for (size_t r=0;r<reads.size();r++) {
+		ObservationModelFBMax om(hp1.second, reads[r], leftPos,params.obsParams);
+		double lm=om.getLogLikelihood();
+		ls[r]=lm;
+		if (lm<params.analyzeLowFreqDiffThreshold) {
+			om.printAlignment(offset);
+		}
+		ll+=addLogs(lf[r],ls[r])+log(.5);
+	}
+	cout << "Total loglikelihood: " << ll << endl;
+
+
+}
+
+void DetInDel::showAlignmentsPerHaplotype(const vector<Haplotype> & haps, const vector<Read> & reads, const vector<vector<MLAlignment> > & liks, uint32_t candPos, uint32_t leftPos)
+{
+	cout << "ALIGNMENTS" << endl;
+
+	vector<std::set<size_t> > maxHap(haps.size());
+	for (size_t r=0;r<reads.size();r++) {
+		size_t idx=0;
+		double ml=-HUGE_VAL;
+		for (size_t h=0;h<haps.size();h++) {
+			if (liks[h][r].ll>ml) {
+				ml=liks[h][r].ll;
+				idx=h;
+			}
+		}
+		maxHap[idx].insert(r);
+	}
+
+	int offset=50;
+	for (size_t h=0;h<haps.size();h++) {
+		cout << "*******************************************" << endl;
+		cout << endl << "HAPLOTYPE " << h << endl << endl;
+		cout << string(offset,' ') << haps[h].seq << endl;
+		BOOST_FOREACH(size_t r, maxHap[h]) {
+			ObservationModelFBMax om(haps[h], reads[r], leftPos,params.obsParams);
+			om.calcLikelihood();
+			om.printAlignment(offset);
+		}
+	}
+
+
+}
+
+
+
+
+string DetInDel::getRefSeq(uint32_t lpos, uint32_t rpos)
+{
+	if (!fai) throw string("FAI error.");
+
+	char *str;
+	char *ref;
+
+	str = (char*)calloc(strlen(params.tid.c_str()) + 30, 1);
+	sprintf(str, "%s:%d-%d", params.tid.c_str(), lpos, rpos);
+	int len;
+	ref = fai_fetch(fai, str, &len);
+	if (len==0) throw string("faidx error: len==0");
+	free(str);
+	string res(ref);
+	free(ref);
+
+	transform(res.begin(), res.end(), res.begin(), ::toupper);
+	return res;
+}
+
+
+double DetInDel::getMaxHap(Haplotype & h1, Haplotype &h2, HapPairLik & hpl, const vector<Haplotype> & haps, vector<HapPairLik> & likPairs)
+{
+
+	size_t idx=0, midx;
+	double maxll=-HUGE_VAL;
+	for (idx=0;idx<likPairs.size();idx++) {
+			double ll=likPairs[idx].ll;
+			if (ll>maxll) {
+				maxll=ll;
+				midx=idx;
+			}
+	}
+	h1=haps[likPairs[midx].h1];
+	h2=haps[likPairs[midx].h2];
+
+	/*
+	cout << "getMaxHap: " << midx <<  " h1: " << likPairs[midx].h1 << " h2: " << likPairs[midx].h2 << endl;
+	cout << "indelcoverage h1: ";
+	for (map<int, VariantCoverage>::const_iterator it=likPairs[midx].hapIndelCoverage1.begin();it!=likPairs[midx].hapIndelCoverage1.end();it++) {
+		cout << "[" << it->second.nf << "," << it->second.nr << "]";
+	}
+	cout << endl;
+	cout << "indelcoverage h2: ";
+	for (map<int, VariantCoverage>::const_iterator it=likPairs[midx].hapIndelCoverage2.begin();it!=likPairs[midx].hapIndelCoverage2.end();it++) {
+		cout << "[" << it->second.nf << "," << it->second.nr << "]";
+	}
+	cout << endl;
+	*/
+
+	hpl=likPairs[midx];
+	return maxll;
+}
+
+void DetInDel::outputMaxHap(ostream *output, const string & prefix, const vector<Haplotype> & haps, vector<HapPairLik> & likPairs)
+{
+
+	Haplotype h1, h2;
+	HapPairLik hpl;
+	getMaxHap(h1,h2, hpl, haps, likPairs);
+	*output << prefix << " " << hpl.ll << " " << hpl.numFirst << " " << hpl.numSecond << " " << hpl.numIndFirst << " " << hpl.numIndSecond << " " << hpl.numOffBoth << " " << h1.seq << " " << h2.seq << " ";
+	for (map<int, AlignedVariant>::const_iterator it=h1.indels.begin();it!=h1.indels.end();it++) if (it->second.getString()!="*REF") *output << "[" << it->first << "," << it->second.getStartRead() << "," << it->second.getString() << "]";
+	*output << "!";
+	for (map<int, AlignedVariant>::const_iterator it=h2.indels.begin();it!=h2.indels.end();it++) if (it->second.getString()!="*REF") *output << "[" << it->first << "," << it->second.getStartRead() << "," << it->second.getString() << "]";
+	*output << "!";
+	for (map<int, AlignedVariant>::const_iterator it=h1.snps.begin();it!=h1.snps.end();it++) if (it->second.getString()!="*REF") *output << "[" << it->first << "," << it->second.getStartRead() << "," << it->second.getString() << "]";
+	*output << "!";
+	for (map<int, AlignedVariant>::const_iterator it=h2.snps.begin();it!=h2.snps.end();it++) if (it->second.getString()!="*REF") *output << "[" << it->first << "," << it->second.getStartRead() << "," << it->second.getString() << "]";
+	*output << endl;
+
+
+}
+
+void DetInDel::outputTopHaps(ostream *output, const string & prefix, const vector<Haplotype> & haps, vector<HapPairLik> & likPairs, int n)
+{
+	// output n most likely haplotype pairs
+
+	for (int ns=0;ns<n && ns<int(likPairs.size());ns++) {
+		const Haplotype & h1 = haps[likPairs[ns].h1];
+		const Haplotype & h2 = haps[likPairs[ns].h2];
+		const HapPairLik & hpl = likPairs[ns];
+		*output << prefix << " " << ns+1 << " " << hpl.ll << " " << hpl.numFirst << " " << hpl.numSecond << " " << hpl.numIndFirst << " " << hpl.numIndSecond << " " << hpl.numOffBoth << " " << h1.seq << " " << h2.seq << " ";
+		for (map<int, AlignedVariant>::const_iterator it=h1.indels.begin();it!=h1.indels.end();it++) if (it->second.getString()!="*REF") *output << "[" << it->first << "," << it->second.getStartRead() << "," << it->second.getString() << "]";
+		*output << "!";
+		for (map<int, AlignedVariant>::const_iterator it=h2.indels.begin();it!=h2.indels.end();it++) if (it->second.getString()!="*REF") *output << "[" << it->first << "," << it->second.getStartRead() << "," << it->second.getString() << "]";
+		*output << "!";
+		for (map<int, AlignedVariant>::const_iterator it=h1.snps.begin();it!=h1.snps.end();it++) if (it->second.getString()!="*REF") *output << "[" << it->first << "," << it->second.getStartRead() << "," << it->second.getString() << "]";
+		*output << "!";
+		for (map<int, AlignedVariant>::const_iterator it=h2.snps.begin();it!=h2.snps.end();it++) if (it->second.getString()!="*REF") *output << "[" << it->first << "," << it->second.getStartRead() << "," << it->second.getString() << "]";
+		*output << endl;
+	}
+
+}
+
+void DetInDel::outputHapsAndFreqs(ostream *output, const string & prefix, const vector<Haplotype> & haps, const vector<double> & freqs, uint32_t leftPos)
+{
+	// output n most likely haplotype pairs
+
+	for (size_t h=0;h<haps.size();h++) {
+		const Haplotype & h1 = haps[h];
+		*output << prefix << " " << h+1 << " " << freqs[h] << " ";
+		for (map<int, AlignedVariant>::const_iterator it=h1.indels.begin();it!=h1.indels.end();it++) if (it->second.getString()!="*REF") *output << leftPos+it->first << "," << it->second.getString() << "|";
+		//*output << "!";
+		//for (map<int, AlignedVariant>::const_iterator it=h1.snps.begin();it!=h1.snps.end();it++) if (it->second.getString()!="*REF") *output << leftPos+it->first << "," << it->second.getString() << "|";
+		*output << endl;
+	}
+
+}
+
+
+
+void DetInDel::empiricalDistributionMethod(int index, const vector<Read> & reads, uint32_t pos, uint32_t leftPos, uint32_t rightPos, const AlignedCandidates & candidateVariants,  OutputData & oData, OutputData & glfData)
+{
+	vector<Haplotype> haps;
+
+	vector<vector<MLAlignment> > liks;
+	vector<HapPairLik> likPairs;
+
+
+
+	// get the haplotypes
+	// changes leftPos and rightPos to haplotype blocks in HDIterator
+
+	// NOTE leftPos will be the left position of the reference sequence each haplotype will be aligned to
+	bool skip=getHaplotypes(haps, reads, pos, leftPos, rightPos, candidateVariants);
+
+	if (int(reads.size()*haps.size())>params.maxHapReadProd) {
+		stringstream os;
+		os << "skipped_numhap_times_numread>" << params.maxHapReadProd;
+		throw os.str();
+	}
+
+	int refSeqPos=leftPos;
+
+	if (skip) {
+		cerr << "tid: " << params.tid << " pos:  " << pos << " SKIPPING!" << endl;
+	}
+	else {
+
+		if (!params.quiet) cout << "[empiricalDistributionMethod] Number of haplotypes: " << haps.size() << endl;
+		// compute likelihood of every read given every haplotype
+
+		if (params.estimateHapFreqs) {
+			vector<double> hapFreqs;
+			map <int, vector<tuple<AlignedVariant, double, double> > > posteriors;
+			vector<HapEstResult> her;
+
+			OutputData::Line prefilledLine(oData);
+			prefilledLine.set("index", index);
+			prefilledLine.set("tid", params.tid);
+			prefilledLine.set("center_position",pos);
+			prefilledLine.set("num_reads", reads.size());
+			prefilledLine.set("msg","ok");
+			// string rseq = getRefSeq(leftPos+1, rightPos+1);
+			prefilledLine.set("lpos",leftPos);
+			prefilledLine.set("rpos",rightPos);
+			// prefilledLine.set("refseq", rseq);
+
+
+			vector<int> onHap(reads.size(),1); // which reads were mapped inside the haplotype window given an artificially high mapping quality
+
+			if (params.slower) {
+				computeLikelihoods(haps, reads, liks, leftPos, rightPos, onHap);
+			} else {
+				computeLikelihoodsFaster(haps, reads, liks, leftPos, rightPos, onHap);
+				// int nrOffAll=0; // number of reads mapped outside all haplotypes
+				// for (size_t x=0;x<onHap.size();x++) if (!onHap[x]) nrOffAll++;
+			}
+
+
+			int numReadOffAllHaps=0;
+			int numHQReads=0;
+			for (size_t r=0;r<reads.size();r++) if (reads[r].mapQual > (1.0-1e-6) ) {
+				numHQReads++;
+				int offall=1;
+				for (size_t h=0;h<haps.size();h++) if (!liks[h][r].offHap) offall=0;
+				if (offall) numReadOffAllHaps++;
+			}
+			prefilledLine.set("num_off_hap", numReadOffAllHaps);
+ 			prefilledLine.set("num_hqreads", numHQReads);
+
+
+			//estimateHaplotypeFrequenciesPosterior(haps, reads,liks, hapFreqs, posteriors, pos, leftPos, glfOutput);
+			/*
+			estimateHaplotypeFrequenciesBayesEM(haps, reads,liks, hapFreqs, her, pos, leftPos, glfOutput, "all");
+			BOOST_FOREACH(HapEstResult hr, her) {
+				//cout << "EMA " << params.tid << " "  << pos << " " << leftPos+hr.pos << " " <<reads.size() << " " << hr.av.getString() << " " << hr.prob << " " << hr.freq << " " << hr.freq*double(reads.size()) << " " << hr.nrf << " " << hr.nrr << endl;
+				OutputData::Line EMALine = prefilledLine;
+				EMALine.set("analysis_type", "EMA");
+				EMALine.set("realigned_position", leftPos+hr.pos);
+				EMALine.set("first_called_all", hr.av.getString());
+				EMALine.set("post_prob_variant", hr.prob);
+				EMALine.set("est_freq", hr.freq);
+				EMALine.set("first_var_cover_forward", hr.nrf);
+				EMALine.set("first_var_cover_reverse", hr.nrr);
+				oData.output(EMALine);
+			}
+			*/
+
+			hapFreqs.clear();
+			her.clear();
+			estimateHaplotypeFrequenciesBayesEM(haps, reads,liks, hapFreqs, her, pos, leftPos, rightPos, glfData, index, candidateVariants, params.bayesType);
+
+			for (size_t x=0;x<her.size();x++) {
+				HapEstResult & hr = her[x];
+				//cout << "EMSV " << params.tid << " "  << pos << " " << leftPos+hr.pos << " " <<reads.size() << " " << hr.av.getString() << " " << hr.prob << " " << hr.freq << " " << hr.freq*double(reads.size()) << " " << hr.nrf << " " << hr.nrr << endl;
+
+				int var_in_window=0;
+				const AlignedVariant & avar = hr.av;
+				const AlignedVariant *av = candidateVariants.findVariant(hr.pos+leftPos, avar.getType(), avar.getString());
+				if (av!=NULL) {
+					var_in_window=1;
+				}
+
+				OutputData::Line EMSVLine = prefilledLine;
+				EMSVLine.set("analysis_type", params.bayesType);
+				EMSVLine.set("realigned_position", leftPos+hr.pos);
+				EMSVLine.set("first_called_all", hr.av.getString());
+				EMSVLine.set("post_prob_variant", hr.prob);
+				EMSVLine.set("est_freq", hr.freq);
+				EMSVLine.set("first_var_cover_forward", hr.nrf);
+				EMSVLine.set("first_var_cover_reverse", hr.nrr);
+				EMSVLine.set("was_candidate_in_window",var_in_window);
+			//	oData.output(EMSVLine);
+			}
+
+
+
+
+			if (params.outputRealignedBAM && params.slower) {
+				stringstream os;
+				os << index << "_" << params.tid << "_" << leftPos+params.minReadOverlap << "_" << rightPos-params.minReadOverlap  << ".bam";
+
+				vector < CIGAR > cigars(reads.size());
+				for (size_t r=0;r<reads.size();r++) {
+					if (onHap[r]) {
+						double llmax=-HUGE_VAL;
+						int hidx=0;
+						for (size_t h=0;h<haps.size();h++) if (liks[h][r].ll>llmax) {
+							llmax=liks[h][r].ll;
+							hidx=h;
+						}
+						cigars[r]=getCIGAR(haps[hidx], reads[r], liks[hidx][r], refSeqPos);
+					}
+				}
+				int leftOk = leftPos + params.minReadOverlap;
+				int rightOk = rightPos - params.minReadOverlap;
+	
+				string newBAMFileName=params.fileName;
+				newBAMFileName.append(".ra.").append(os.str());
+				writeRealignedBAMFile(newBAMFileName, cigars, reads, onHap, myBams[0]->bh);
+
+				if (params.processRealignedBAM != "no")  {
+					stringstream cmd;
+					cmd << params.processRealignedBAM << " " << newBAMFileName << " " << params.fileName << "_realigned" << " " << params.tid << " " << leftOk << " " << rightOk;
+					cout << "Executing: " << cmd.str() << endl;
+					system(cmd.str().c_str());
+				}
+
+
+				/*
+				newBAMFileName=params.fileName;
+				newBAMFileName.append(".ua.").append(os.str());
+				writeUnalignedBAMFile(newBAMFileName, reads, onHap, myBams[0].bh);
+				*/
+			}
+
+		}
+		if (params.showHapAlignments) {
+			showAlignmentsPerHaplotype(haps, reads, liks, pos, leftPos);
+		}
+
+		if (params.doDiploid) {
+		//	cout << "A" << endl;
+			vector<double> hapFreqs;
+			map <int, vector<tuple<AlignedVariant, double, double> > > posteriors;
+			vector<HapEstResult> her;
+
+			vector<int> onHap(reads.size(),1); // which reads were mapped inside the haplotype window given an artificially high mapping quality
+
+			if (params.slower) {
+				computeLikelihoods(haps, reads, liks, leftPos, rightPos, onHap);
+			} else {
+				computeLikelihoodsFaster(haps, reads, liks, leftPos, rightPos, onHap);
+			}
+
+			diploidGLF(haps, reads, liks, hapFreqs, her, pos, leftPos, rightPos, glfData, index,  candidateVariants,"dip");
+
+
+			//statisticsHaplotypePair(haps,reads,liks, hpl, prefilledLine);
+
+			/*
+			string prefix("FMAP");
+
+
+			callIndel(haps, reads, liks, likPairs, pos, leftPos, rightPos, prefix, prefilledLine, oData);
+			callSNP(haps, reads, liks, likPairs, pos, leftPos, rightPos, prefix, prefilledLine, oData);
+			*/
+			/*
+			if (!(nci==0 && ncs==0 && params.printCallsOnly)) {
+				stringstream os; os << " " << params.tid << " " << pos << " " << leftPos; prefix.append(os.str());
+				outputTopHaps(&output, prefix, haps, likPairs, params.numOutputTopHap);
+			}
+			*/
+
+			/*
+			if (params.analyzeLowFreq) {
+				pair<Haplotype, Haplotype> oh;
+				oh.first=h2;
+				oh.second=h1;
+				analyzeDifference(oh, reads, leftPos, rightPos);
+
+				oh.first=h1;
+				oh.second=h2;
+				analyzeDifference(oh, reads, leftPos, rightPos);
+				//oh.first.printHaps();
+				//oh.second.printHaps();
+			}
+			*/
+
+			if (params.outputRealignedBAM && params.slower) {
+				// computes pair likelihoods using priors
+				computePairLikelihoods(haps, reads, liks, likPairs, true,candidateVariants, leftPos);
+				Haplotype h1, h2; HapPairLik hpl;
+				getMaxHap(h1, h2, hpl, haps, likPairs);
+
+				vector < CIGAR > cigars(reads.size());
+				for (size_t r=0;r<reads.size();r++) {
+						int hmax = hpl.h1;
+						if (fabs(liks[hpl.h1][r].ll-liks[hpl.h2][r].ll)<1e-8) {
+							if (haps[hpl.h1].countIndels()<haps[hpl.h2].countIndels()) hmax = hpl.h1; else hmax = hpl.h2;
+						} else {
+							if (liks[hpl.h1][r].ll>liks[hpl.h2][r].ll) {
+								hmax = hpl.h1;
+							} else {
+								hmax = hpl.h2;
+							}
+						}
+						const Haplotype & hx = haps[hmax];
+						const Read & rd = reads[r];
+						const MLAlignment & ml = liks[hmax][r];
+						cigars[r]=getCIGAR(haps[hmax], reads[r], liks[hmax][r], refSeqPos);
+				}
+
+				stringstream os;
+				int leftOk = leftPos + params.minReadOverlap;
+				int rightOk = rightPos - params.minReadOverlap;
+				os << index << "_" << params.tid << "_" << leftPos+params.minReadOverlap << "_" << rightPos-params.minReadOverlap  << ".bam";
+				string newBAMFileName=params.fileName;
+				newBAMFileName.append(".ra.").append(os.str());
+				writeRealignedBAMFile(newBAMFileName, cigars, reads, onHap, myBams[0]->bh);
+				/*
+				newBAMFileName=params.fileName;
+				newBAMFileName.append(".ua.").append(os.str());
+				writeUnalignedBAMFile(newBAMFileName, reads, onHap, myBams[0]->bh);
+				*/
+				if (params.processRealignedBAM != "no")  {
+					stringstream cmd;
+					cmd << params.processRealignedBAM << " " << newBAMFileName << " " << params.fileName << "_realigned" << " " << params.tid << " " << leftOk << " " << rightOk;
+					cout << "Executing: " << cmd.str() << endl;
+					system(cmd.str().c_str());
+				}
+
+
+			}
+
+
+		}
+//		glf.writeToFile(string(""), output);
+	}
+
+}
+
+
+
+void DetInDel::writeUnalignedBAMFile(const string & fileName, const vector<Read> & reads, const vector<int> & onHap, const bam_header_t *bh=NULL)
+{
+
+	if (onHap.size()!=reads.size()) return;
+	bool hasUnaligned=false;
+	for (size_t x=0;x<onHap.size();x++) if (!onHap[x]) {
+		hasUnaligned=true;
+		break;
+	}
+	if (!hasUnaligned) return;
+
+	bamFile bf = bam_open(fileName.c_str(),"wb");
+	if (bf==NULL) throw string("Cannot open bamfile ").append(fileName).append(" for writing!");
+
+	if (bh!=NULL) {
+		bam_header_write(bf, bh);
+	}
+
+	for (size_t r=0;r<reads.size();r++) if (!onHap[r]) {
+		bam1_t *b=reads[r].bam;
+		if (bam_write1(bf,b)<=0) throw string("Error writing to unalignable read to bamfile.");
+	}
+
+	bam_close(bf);
+}
+
+void DetInDel::writeRealignedBAMFile(const string & fileName, const vector<CIGAR> & cigars, const vector<Read> & reads, const vector<int> & onHap, const bam_header_t *bh=NULL)
+{
+		if (cigars.size()!=reads.size()) throw string("Problem with the cigars.");
+
+		bamFile bf = bam_open(fileName.c_str(),"wb");
+		if (bf==NULL) throw string("Cannot open bamfile ").append(fileName).append(" for writing!");
+
+		if (bh!=NULL) {
+				bam_header_write(bf, bh);
+		}
+
+		for (size_t r=0;r<reads.size();r++) {
+
+				bam1_t *b=reads[r].bam;
+
+				if (onHap[r]) {
+						bam1_t *nb=bam_init1();
+
+						uint32_t old_ncig=b->core.n_cigar;
+						uint32_t new_ncig=cigars[r].size();
+						int old_data_len=b->data_len;
+						int new_data_len=old_data_len - old_ncig*4 + new_ncig*4;
+
+						*nb=*b;
+						nb->data = (uint8_t*)calloc(new_data_len, 1);
+						nb->data_len=new_data_len;
+						nb->m_data=nb->data_len;
+						// copy cigar
+						for (uint32_t n=0;n<new_ncig;n++) {
+								bam1_cigar(nb)[n]=(  (((uint32_t) cigars[r][n].second) << BAM_CIGAR_SHIFT) | ( ( (uint32_t) cigars[r][n].first ) )  );
+						}
+						nb->core.n_cigar=(unsigned int) new_ncig;
+
+						for (size_t n=0;n<b->core.l_qname;n++) {
+								nb->data[n]=b->data[n];
+						}
+
+						int y=b->core.l_qname+4*new_ncig;
+						for (int n=b->core.l_qname+4*old_ncig;n<b->data_len;n++,y++) {
+								nb->data[y]=b->data[n];
+						}
+
+						// update position of read
+
+						nb->core.pos=cigars[r].refPos;
+						// update insert size if mapped
+						nb->core.isize=cigars[r].refPos-nb->core.mpos;
+						if (bam_write1(bf,nb)<=0) throw string("Error writing alignment to realigned BAM file.");
+						bam_destroy1(nb);
+				} else {
+						if (bam_write1(bf,b)<=0) throw string("Error writing alignment to realigned BAM file.");
+				}
+		}
+
+		bam_close(bf);
+}
+
+
+DetInDel::CIGAR DetInDel::getCIGAR(const Haplotype & hap, const Read & read, const MLAlignment & ml, int refSeqStart)
+{
+	if (hap.ml.hpos.size()!=hap.size()) throw string("Haplotype has not been aligned!");
+	if (ml.hpos.size()!=read.size()) throw string("Read is not properly aligned!");
+	const MLAlignment & hml=hap.ml; // alignment of haplotype to reference
+
+	//string qname = bam1_qname(read.getBam());
+	const int debug = 0;
+	/*
+	if (qname == "IL8_4337:8:102:11530:1494") {
+		cout << "YES" << endl;
+		cout << qname << endl;
+		debug = 1;
+	}
+	*/
+
+	vector<int> npos(read.size()); // npos records position of read base on the reference sequence
+	for (int b=0;b<int(read.size());b++) {
+		if (ml.hpos[b]>=0) npos[b]=hml.hpos[ml.hpos[b]]; else npos[b]=ml.hpos[b];
+	}
+
+	if (debug) {
+		for (size_t h=0;h<npos.size();h++) {
+			cout << "[" << h << "," << npos[h] << "]";
+		}
+		cout << endl;
+		cout << endl;
+
+		for (size_t h=0;h<ml.hpos.size();h++) {
+			cout << "[" << h << "," << ml.hpos[h] << "]";
+		}
+		cout << endl;
+		cout << endl;
+		for (size_t h=0;h<hml.hpos.size();h++) {
+				cout << "[" << h << "," << hml.hpos[h] << "]";
+			}
+			cout << endl;
+	}
+	CIGAR cig;
+
+	int b=0, prevponr=0; // position on reference previous base aligned on the reference (ie no deletion/insertion/LO/RO)
+
+	// determine last base in read aligned to the haplotype
+	b=read.size()-1;
+	while (npos[b]<0) b--;
+	int lastbonh=b;
+
+	if (lastbonh<0) {
+		// clip the whole read, read is de facto off haplotype
+		cig.push_back(CIGAR::CIGOp(BAM_CSOFT_CLIP, read.size()));
+		return cig;
+	}
+
+
+	if (debug) {
+		cout << "lastbonh: " << lastbonh << endl;
+	}
+	// find first base in read that is aligned to haplotype and to reference
+	// all sequence before that is considered 'soft clipped', ie not aligned. This may include sequence that matches perfectly to the reference
+	b=0;
+	while (npos[b]<0) b++;
+ 	if (b>0) cig.push_back(CIGAR::CIGOp(BAM_CSOFT_CLIP, b));
+ 	prevponr=npos[b];
+ 	cig.refPos=refSeqStart+prevponr;
+
+ 	int curr_cop=BAM_CMATCH;
+ 	int len_curr_cop=1;
+
+
+
+ 	while (b<lastbonh) {
+
+ 		int chp=npos[b]; // position on reference of current base in read
+ 		int nhp=npos[b+1];
+
+ 		if (nhp==MLAlignment::INS) {
+ 			if (chp==MLAlignment::INS) {
+ 				// stay on inserted sequence
+ 				if (curr_cop!=BAM_CINS) throw string("Error(1)!");
+ 				len_curr_cop++;
+ 			} else {
+ 				if (chp>=0) {
+ 					// going from on reference to insertions
+ 					if (curr_cop!=BAM_CMATCH) throw string("Error(2)!");
+ 					// write CIGAR
+					cig.push_back(CIGAR::CIGOp(BAM_CMATCH,len_curr_cop));
+
+					// update current CIGAR operation
+					len_curr_cop=1;
+					curr_cop=BAM_CINS;
+
+					prevponr=chp;
+ 				} else throw string("How is this possible? (1)");
+ 			}
+
+ 		} else if (chp>=0 && nhp>=0 && nhp-chp==1) {
+ 	 	  // MATCH to MATCH
+ 			if (curr_cop!=BAM_CMATCH) {
+ 				cout << "b: " << b << " chp: " << chp << " nhp: " << nhp << endl;
+ 				throw string("Error(3)!");
+ 			}
+ 			len_curr_cop++;
+ 			prevponr=nhp;
+ 		} else if (chp>=0 && nhp>=0 && nhp-chp>1) {
+ 			// deletion
+ 			if (curr_cop!=BAM_CMATCH) throw string("Error(4)!");
+ 			// write CIGAR
+ 			cig.push_back(CIGAR::CIGOp(BAM_CMATCH,len_curr_cop));
+
+ 			// write deletion CIGAR
+ 			cig.push_back(CIGAR::CIGOp(BAM_CDEL,nhp-chp-1));
+
+ 			curr_cop=BAM_CMATCH;
+ 			len_curr_cop=1;
+
+ 			prevponr=nhp;
+ 		} else if (chp==MLAlignment::INS && nhp-prevponr==1) {
+ 			// from inserted bases to bases matched to the reference
+ 			cig.push_back(CIGAR::CIGOp(BAM_CINS,len_curr_cop));
+
+ 			//
+ 			curr_cop=BAM_CMATCH;
+ 			len_curr_cop=1;
+
+ 			prevponr=nhp;
+ 		} else if (chp==MLAlignment::INS && nhp-prevponr>1) {
+ 			// next base is again on reference but more than 1 reference base from the last read base aligned to the haplotype
+ 			cig.push_back(CIGAR::CIGOp(BAM_CINS,len_curr_cop));
+ 			cig.push_back(CIGAR::CIGOp(BAM_CDEL,nhp-prevponr-1));
+
+ 			curr_cop=BAM_CMATCH;
+ 			len_curr_cop=1;
+
+ 			prevponr=nhp;
+ 		}
+ 		b++;
+ 	}
+
+ 	// write last cigar
+ 	cig.push_back(CIGAR::CIGOp(curr_cop,len_curr_cop));
+
+ 	// write soft_clip at the end
+ 	if (read.size()-1 - lastbonh>0) {
+ 		cig.push_back(CIGAR::CIGOp(BAM_CSOFT_CLIP,read.size()-1 - lastbonh));
+ 	}
+
+ 	/*
+ 	cout << "cig: ";
+ 	BOOST_FOREACH(CIGAR::CIGOp cop, cig) {
+ 		cout << "(" << cop.first << "," << cop.second << ")" ;
+ 	}
+ 	cout << endl;
+	*/
+ 	return cig;
+}
+
+
+void DetInDel::getReads(uint32_t leftPos, uint32_t rightPos, vector<Read> & reads, uint32_t & oldLeftPos, uint32_t & oldRightFetchReadPos, vector<Read *> & readBuffer, bool reset)
+{
+	// filter using map quality
+	class SortFunc {
+	public:
+		static bool sortFunc(const Read & r1, const Read & r2)
+		{
+			// sort in decreasing order
+			if (r1.mapQual>r2.mapQual) return true; else return false;
+		}
+	};
+
+	if (leftPos<oldLeftPos) {
+		cerr << "Windows are not sorted!" << endl;
+		exit(3);
+	}
+
+	reads.clear();
+
+	if (int(rightPos-leftPos)<3*params.minReadOverlap) throw string("Choose a larger width or a smaller minReadOverlap.");
+
+
+	int maxDev = int ( libraries.getMaxInsertSize());
+
+	//maxDev = 100;
+	//cerr << "CHANGE THIS CHANGE THIS" << endl;
+
+	string_hash< list<int> > mapped_name_to_idx, unmapped_name_to_idx; // query name to read idx
+	string_hash< list<int> >::const_iterator hash_it;
+
+	int numUnknownLib = 0;
+	string_hash <int> unknownLib;
+	const int LEFTPAD = 200;
+	// note the idea is to get only consider reads starting at position leftMostReadPos (and not ones merely overlapping)
+	// LEFTPAD should take care of overlap effects (note that leftMostReadPos is already generous, based on library insert size)
+
+
+	uint32_t rightFetchReadPos = rightPos+maxDev;
+	uint32_t rightMostReadPos = rightPos+maxDev;
+
+	uint32_t leftFetchReadPos = leftPos-maxDev-LEFTPAD;
+	uint32_t leftMostReadPos = leftPos-maxDev-LEFTPAD; // left most position of reads we want to seriously consider
+
+	// reset indicates whether we want to remake the readBuffer
+
+	vector<Read*> newReadBuffer;
+	bool leftOverlapsPrevious = false;
+	if (reset) {
+		for (size_t r=0;r<readBuffer.size();r++) {
+			if (readBuffer[r]!=NULL) delete readBuffer[r];
+		}
+		readBuffer.clear();
+		oldRightFetchReadPos = rightFetchReadPos;
+	} else {
+		// clear reads that do not overlap with new window [leftMostReadPos, rightMostReadPos]
+
+		for (size_t r=0;r<readBuffer.size();r++) {
+			uint32_t rend = readBuffer[r]->getEndPos();
+			uint32_t rbeg = readBuffer[r]->getBam()->core.pos;
+			if (rbeg<leftMostReadPos) {
+				delete readBuffer[r];
+				readBuffer[r]=NULL;
+			} else {
+				newReadBuffer.push_back(readBuffer[r]);
+			}
+		}
+
+		// note that it is required that the new leftPos of the window >= the old leftPos
+		// therefore if leftFetchReadPos<=oldRightFetchReadPos the new w
+		if (leftMostReadPos<oldRightFetchReadPos) {
+			leftFetchReadPos = oldRightFetchReadPos;
+			leftOverlapsPrevious = true;
+		}
+	}
+
+
+
+
+	// cout << "leftFetchReadPos: " << leftFetchReadPos << " rightFetchReadPos: " << rightFetchReadPos << " oldRightFetchReadPos: " << oldRightFetchReadPos << endl;
+	// cout << "leftMostReadPos: " << leftMostReadPos << " rightMostReadPos: " << rightMostReadPos << " leftOverlapsPrevious: " << int(leftOverlapsPrevious) << endl;
+	// store updated readBuffer
+	readBuffer.swap(newReadBuffer);
+
+//	cout << "leftPos : " << leftPos << " rightPos: " << rightPos << " maxDev: " << maxDev << endl;
+
+
+	// first clean readbuffer
+
+
+	int numReads = readBuffer.size();
+
+	vector<Read> newReads;
+	if (leftFetchReadPos<=rightFetchReadPos) {
+		cout << "Fetching reads...." << endl;
+		for (size_t b=0;b<myBams.size();b++) {
+			//bam_fetch(myBams[b].bf, myBams[b].idx, myBams[b].getTID(params.tid), leftPos+params.minReadOverlap, rightPos-params.minReadOverlap, &reads, &Read::fetchFuncVector);
+			Read::FetchReadData data(&newReads, int(b), &(this->libraries), &myBams, numReads, params.maxReads*100);
+			bam_fetch(myBams[b]->bf, myBams[b]->idx, myBams[b]->getTID(params.tid), leftFetchReadPos , rightFetchReadPos,&data , &Read::fetchFuncVectorPooled);
+			numUnknownLib += data.numUnknownLib;
+			numReads = data.numReads;
+		}
+		oldRightFetchReadPos = rightFetchReadPos;
+	}
+
+	// add new reads to readBuffer
+
+	for (size_t r=0;r<newReads.size();r++) {
+		if (newReads[r].getBam()->core.pos>=leftFetchReadPos) {
+			// only store reads that do not overlap with the boundary;
+			// reads overlapping with boundary will have been picked up before.
+			readBuffer.push_back(new Read(newReads[r]));
+		}
+	}
+
+	if (0) {
+		// check with regular fetch
+		vector<Read> tmpReads;
+		for (size_t b=0;b<myBams.size();b++) {
+			//bam_fetch(myBams[b].bf, myBams[b].idx, myBams[b].getTID(params.tid), leftPos+params.minReadOverlap, rightPos-params.minReadOverlap, &reads, &Read::fetchFuncVector);
+			Read::FetchReadData data(&tmpReads, int(b), &(this->libraries), &myBams, numReads, params.maxReads*100);
+			bam_fetch(myBams[b]->bf, myBams[b]->idx, myBams[b]->getTID(params.tid), leftMostReadPos , rightMostReadPos,&data , &Read::fetchFuncVectorPooled);
+		}
+		for (size_t r=0;r<tmpReads.size();r++) {
+			if (tmpReads[r].getBam()->core.pos>=leftMostReadPos) {
+				string qname = string(bam1_qname(tmpReads[r].getBam()));
+				cout << "glp: "  << leftPos << " qname: " << qname << " pos: " << tmpReads[r].pos << " end: " << tmpReads[r].getEndPos() << endl;
+			}
+		}
+	}
+
+
+
+	// check readbuffer for duplicates (debugging)
+	if (1) {
+		string_hash <int> qnameCount;
+		for (size_t r=0;r<readBuffer.size();r++) {
+			string qname = string(bam1_qname(readBuffer[r]->getBam()));
+			//cout << "lp: "  << leftPos << " qname: " << qname << " pos: " << readBuffer[r]->pos << " end: " << readBuffer[r]->getEndPos() << endl;
+			string_hash<int>::iterator it = qnameCount.find(qname);
+			if (it == qnameCount.end()) {
+				qnameCount[qname]=1;
+			} else {
+				qnameCount[qname]++;
+				if (qnameCount[qname]>2) {
+					cerr << "Duplicate reads: readbuffer problem!" << endl;
+					throw string("duplicate reads!");
+				}
+			}
+		}
+	}
+
+
+
+	newReads.clear();
+
+	size_t oldNumReads=readBuffer.size();
+
+
+	// copy readBuffer to reads
+
+	for (size_t r=0;r<readBuffer.size();r++) {
+		reads.push_back(Read(*readBuffer[r]));
+	}
+
+
+	// get query names
+
+	vector<int> unmapped;
+	for (size_t r=0; r<reads.size();r++) {
+		if (reads[r].isUnmapped())  {
+			unmapped.push_back(r);
+			unmapped_name_to_idx[ string(bam1_qname(reads[r].getBam())) ].push_back(r);
+		//	 cout << " __reads[" << r  << "]: " << bam1_qname(reads[r].getBam()) << " UNMAPPED" << endl;
+		} else {
+			mapped_name_to_idx[ string(bam1_qname(reads[r].getBam())) ].push_back(r);
+		//	cout << " __reads[" << r  << "]: " << bam1_qname(reads[r].getBam()) << " pos: " << reads[r].pos << " " << reads[r].getBam()->core.pos << " mpos: " << reads[r].getBam()->core.mpos << " mu: " << reads[r].mateIsUnmapped() << endl;
+		}
+	}
+
+
+	// filter reads based on haplotype window, the minimum read overlap for mapped reads, minimum mapping quality and maximum read length
+
+	/*
+	cout << "name_to_idx.size: " << mapped_name_to_idx.size() << endl;
+	for (hash_it = mapped_name_to_idx.begin();hash_it!=mapped_name_to_idx.end();hash_it++) {
+		cout << "hit: " << hash_it->first;
+		BOOST_FOREACH(int x, hash_it->second) {
+			cout << " " << x;
+		}
+		cout << endl;
+	}
+	*/
+	int numTIDmismatch = 0, numOrphan =0, numOrphanUnmapped = 0, numInRegion = 0;
+
+	// reads are filtered by setting mapping quality to -1
+	vector<Read> filteredReads;
+	double minMapQual = params.mapQualThreshold;
+	if (minMapQual<0.0) minMapQual=0.0;
+	for (int r=0;r<int(reads.size());r++) {
+		//cout << "***" << endl;
+		//cout << "reads.mapQual " << reads[r].mapQual <<  " bq: " << reads[r].getBam()->core.qual << endl;
+		bool filter = false;
+		int tf = 0;
+		if (reads[r].size()>params.maxReadLength) filter=true;
+
+		if (reads[r].getEndPos()<leftMostReadPos || reads[r].pos>rightMostReadPos) filter=true;
+
+		if (!reads[r].isUnmapped()) {
+	//		cout << "mapped" << endl;
+			if (reads[r].pos+int(reads[r].size())<int(leftPos)+params.minReadOverlap || reads[r].pos>int(rightPos)-params.minReadOverlap) {
+		//		cout << " { " << reads[r].pos+reads[r].size() << " " << leftPos+params.minReadOverlap << " " << reads[r].pos << " " << rightPos-params.minReadOverlap << " } " << endl;
+				filter=true;
+				tf = 1;
+			} else if (reads[r].mateIsUnmapped() == false ){
+				if (reads[r].getBam()->core.mtid != reads[r].getBam()->core.tid) {
+					// filter = true;
+					// cout << "TIDERR: reads[" << r << "]: " << bam1_qname(reads[r].getBam()) << " matePos: " << reads[r].matePos << " mateLen: " << reads[r].mateLen << endl;
+					numTIDmismatch++;
+				} else {
+					// find mate of mapped read
+					// filter if we cannot find it (mapped to another chromosome, those are a bit suspicious)
+
+					// lookup mapped read
+					hash_it = mapped_name_to_idx.find(string(bam1_qname(reads[r].getBam())));
+					if (hash_it == mapped_name_to_idx.end()) { numOrphan++; filter=true; } else {
+						if (hash_it->second.size()>2) cerr << "HUH? DUPLICATE READ LABELS???" << endl;
+						if (reads[r].mateIsUnmapped() == false) {
+							filter = true;
+						}
+
+						BOOST_FOREACH(int idx, hash_it->second) {
+							if (idx != r) {
+								reads[r].mateLen = reads[idx].size();
+								reads[r].matePos = reads[idx].pos;
+								filter = false;
+								if (reads[r].matePos != reads[r].getBAMMatePos()) {
+									cerr << "matepos inconsistency!" << endl;
+									cerr << reads[r].matePos << " " << reads[r].getBAMMatePos() << endl;
+									exit(1);
+								}
+							}
+						}
+
+						if (filter == true) {
+							numOrphan++;
+							tf = 2;
+						}
+						//cout << "mapped read: " << r << " " << qname << " pos: " << reads[r].pos << " " << reads[r].getBam()->core.mtid << " " <<  reads[r].getBam()->core.mpos << " mateunmap: " << reads[r].mateIsUnmapped() << endl;
+					}
+				}
+			} else if (reads[r].mateIsUnmapped() == true) {
+				reads[r].matePos=reads[r].pos;
+				hash_it = unmapped_name_to_idx.find(string(bam1_qname(reads[r].getBam())));
+				if (hash_it == unmapped_name_to_idx.end()) { filter=true; } else {
+					filter = true;
+					if (hash_it->second.size()>2) cerr << "HUH? DUPLICATE READ LABELS???" << endl;
+					BOOST_FOREACH(int idx, hash_it->second) {
+						if (idx != r) {
+							reads[r].mateLen = reads[idx].size();
+							filter = false;
+						}
+					}
+
+				}
+				if (filter==true) {
+					numOrphan++;
+					tf = 3;
+				}
+			}
+			if (filter == false) numInRegion++;
+
+		} else {
+	//		cout << " unmapped" << endl;
+			// read is unmapped
+			if (params.mapUnmappedReads) {
+	//			cout << " unmapped " << qname << " ; ";
+
+				// lookup mapped read
+				hash_it = mapped_name_to_idx.find(string(bam1_qname(reads[r].getBam())));
+				int idx;
+				if (hash_it == mapped_name_to_idx.end()) { numOrphanUnmapped++; filter=true; tf = 4;} else {
+					if (hash_it->second.size()!=1) {
+						cerr << "UNMAPPED READ HAS MORE THAN ONE MATE!" << endl;
+						exit(1);
+					}
+					idx = *(hash_it->second.begin());
+					//cout << " FOUND " << idx << endl ;
+					// check if mate will overlap with haplotype
+					uint32_t range_l, range_r; // range of mate
+
+					int maxInsert = (int) reads[idx].getLibrary().getMaxInsertSize();
+					int minInsert = 0;
+					uint32_t rpos = reads[idx].pos;
+
+	//				cout << "idx: " << idx << " unmapped: " << reads[idx].isUnmapped() << " rpos: " << rpos << " isreverse: " << reads[idx].isReverse() << endl;
+					if (reads[idx].isReverse()) {
+						range_l = rpos-maxInsert;
+						range_r = rpos-minInsert;
+					} else {
+						range_l = rpos+minInsert;
+						range_r = rpos+maxInsert;
+					}
+
+	//				cout << "insert: " << insert << " std: " << std << " range_l : " << range_l << " range_r: " << range_r << " leftPos: " << leftPos << "  rightPos: " << rightPos << endl;
+
+					if (range_r>leftPos && range_l<rightPos) {
+						numInRegion++;
+						filter=false;
+						reads[r].mapQual = reads[idx].mapQual;
+						reads[r].matePos = reads[idx].pos;
+						reads[r].mateLen = reads[idx].size();
+						if (reads[r].isReverse() == reads[idx].isReverse()) {
+							reads[r].reverse();
+							reads[r].complement();
+						}
+					} else {
+						filter=true;
+						tf = 5;
+					}
+				}
+			} else {
+				filter = true;
+			}
+
+		}
+		if (filter == true) reads[r].mapQual = -1.0;
+
+//		cout << "reads[" << r << "]: " << bam1_qname(reads[r].getBam()) << " matePos: " << reads[r].matePos << " mateLen: " << reads[r].mateLen << " Filter: " << tf << " filter: " << filter <<  " mq: " << reads[r].mapQual << endl;
+	}
+
+
+	int nUnmapped = 0;
+	int nMateposError = 0;
+	sort(reads.begin(), reads.end(), SortFunc::sortFunc);
+	size_t max; for (max=0;max<params.maxReads && max<reads.size();max++) if (!(reads[max].mapQual<minMapQual)) {
+		if (reads[max].matePos==-1 && reads[max].isPaired() && !reads[max].mateIsUnmapped() ) {
+			nMateposError++;
+			reads[max].matePos = reads[max].pos;
+		};
+		filteredReads.push_back(Read(reads[max]));
+		if (reads[max].isUnmapped()) nUnmapped++;
+	} else break;
+
+
+	filteredReads.swap(reads);
+	filteredReads.clear();
+	//Read::filterReads(reads, params.maxReads, params.mapQualThreshold, params.maxReadLength, params.minReadOverlap, leftPos, rightPos);
+
+	if (params.filterReadAux!="") {
+		if (params.filterReadAux.size()>1) {
+			size_t before=reads.size();
+			int exclude=1;
+			if (params.filterReadAux[0]=='+') exclude=0;
+			string match=params.filterReadAux.substr(1,params.filterReadAux.size());
+			Read::filterReads(reads, exclude, match);
+			size_t after=reads.size();
+			if (!params.quiet) cout << "filterAux: " << before-after << " reads were filtered based on match string " << params.filterReadAux << endl;
+		}
+	}
+
+	if (!params.quiet) cout << "Number of reads: " << reads.size() << " out of " << oldNumReads << " # unmapped reads: " << nUnmapped << " numReadsUnknownLib: " << numUnknownLib << " numChrMismatch: " << numTIDmismatch << " numMappedWithoutMate: " << numOrphan << " numUnmappedWithoutMate: " << numOrphanUnmapped << endl;
+	if (nMateposError) {
+		cerr << "The mate position of " << nMateposError << " reads was recorded as -1 in the BAM file" << endl;
+	}
+
+	if (params.showReads) {
+		for (size_t r=0;r<reads.size();r++) {
+			cout << "read[" << r << "]: " << reads[r] << endl;
+		}
+	}
+
+	if (reads.size()<2) {
+		throw string("too_few_reads");
+	} else if (reads.size()>=params.maxReads) {
+		throw string("above_read_count_threshold");
+	}
+
+}
+
+
+void DetInDel::detectIndels(const string & variantsFileName)
+{
+
+	ofstream output;
+	ofstream glfOutput;
+
+	string callsFile=params.fileName; callsFile.append(".calls.txt");
+	string glfFile=params.fileName; glfFile.append(".glf.txt");
+
+	OutputData oData=params.makeOutputData(output);
+
+	/*
+	output.open(callsFile.c_str());
+	if (!output.is_open()) {
+		throw(string("Cannot open file ").append(callsFile).append(" for writing."));
+	}
+
+	oData.outputLine(oData.headerString());
+	*/
+
+
+	glfOutput.open(glfFile.c_str());
+	if (!glfOutput.is_open()) {
+		throw(string("Cannot open file ").append(glfFile).append(" for writing."));
+	}
+
+	OutputData glfData=params.makeGLFOutputData(glfOutput);
+	glfData.outputLine(glfData.headerString());
+
+	VariantFile vf(variantsFileName);
+
+	int index=0;
+	//for (map<uint32_t,InDel>::const_iterator it=indels.begin();it!=indels.end();it++, cnt++) {
+
+	vector<Read *> readBuffer;
+	uint32_t oldLeftPos=0, oldRightFetchReadPos=0;
+
+
+	string oldTid("-1");
+
+	// NOTE ReadBuffer should be reset on first usage or on chromosome change!
+	bool resetReadBuffer = true;
+
+
+
+	while (!vf.eof()) {
+		AlignedCandidates candidateVariants;
+		candidateVariants=vf.getLineVector(params.varFileIsOneBased);
+		if (candidateVariants.variants.size()==0) continue;
+
+		vector<Read> reads;
+
+		uint32_t pos, leftPos, rightPos;
+		// get lowest and highest position
+
+		//leftPos=(candidateVariants.leftPos>int(params.width))?(candidateVariants.leftPos-params.width):0;
+		//rightPos=(candidateVariants.rightPos+params.width-1);
+		leftPos = candidateVariants.leftPos;
+		rightPos = candidateVariants.rightPos;
+
+
+		pos = candidateVariants.centerPos;
+		params.tid=candidateVariants.tid;
+
+		if (params.tid!=oldTid) {
+			// reinit
+			resetReadBuffer = true;
+			oldTid = params.tid;
+			oldLeftPos = 0;
+		}
+
+		if (leftPos < oldLeftPos) {
+			cerr << "leftPos: " << leftPos << " oldLeftPos: " << oldLeftPos << endl;
+			cerr << "Candidate variant files must be sorted on left position of window!" << endl;
+			exit(1);
+		}
+
+
+// TODO either add tid to AlignedVariant or infer it from the vector of aligned variants
+// change alige
+		index++;
+		bool skipped = false;
+
+		if (!params.quiet) cout << "****" << endl << " tid: " << params.tid << " pos: " << pos << " leftPos: " << leftPos << " " << " rightPos: " << rightPos << endl;
+
+		string message="ok";
+		/*
+		if (!(indels[pos].count[0]>=params.minCount || indels[pos].count[1]>=params.minCount)) {
+			message="below_indel_count_threshold";
+			skipped=true;
+			goto _end;
+		}
+		*/
+
+
+
+		try {
+			getReads(leftPos, rightPos, reads, oldLeftPos, oldRightFetchReadPos, readBuffer, resetReadBuffer);
+
+
+			if (params.inferenceMethod=="empirical") empiricalDistributionMethod(index, reads, pos, leftPos, rightPos,  candidateVariants, oData, glfData);
+			else throw string("Unknown inference method");
+
+		}
+		catch (string s) {
+			for (size_t x=0;x<s.size();x++) if (s[x]==' ') s[x]='_';
+			message=string("error_").append(s);
+			skipped=true;
+			goto _end;
+		}
+		catch (std::bad_alloc) {
+			message = string("error_bad_alloc");
+			skipped = true;
+			goto _end;
+		}
+		catch (std::exception& e) {
+			message = string("error_exception_").append(e.what());
+			skipped = true;
+			goto _end;
+		}
+
+
+		_end:
+
+		if (skipped) {
+			cerr << "skipped " << params.tid << " " << pos << " reason: " << message << endl;
+			//OutputData::Line line(oData);
+			//line.set("msg", message);
+			//line.set("index", index);
+			//oData.output(line);
+
+			OutputData::Line gline(glfData);
+			gline.set("msg", message);
+			gline.set("index", index);
+			gline.set("tid", params.tid);
+			gline.set("lpos", leftPos);
+			gline.set("rpos", rightPos);
+			glfData.output(gline);
+
+			// reset read buffer: all reads will be fetched again
+			resetReadBuffer = true;
+		} else {
+			resetReadBuffer = false;
+		}
+
+		oldLeftPos = leftPos;
+	}
+
+	//output.close();
+	glfOutput.close();
+
+	// clean up read buffer
+	for (size_t r=0;r<readBuffer.size();r++) {
+		if (readBuffer[r]!=NULL) delete readBuffer[r];
+	}
+
+
+}
+
+
+
+
+bool DetInDel::alignHaplotypes(vector<Haplotype> & haps,  uint32_t pos, uint32_t & leftPos, uint32_t & rightPos, map<int, std::set<AlignedVariant> > & variants)
+{
+	uint32_t start=leftPos;
+	uint32_t end=rightPos+1;
+
+	variants.clear();
+
+	int print=0;
+
+	seqan::Score<int> score(-1, -460, -100,-960);
+
+	Read rh1;
+	rh1.pos=0;
+	rh1.posStat.first=0;
+	rh1.mapQual=1.0-1e-32;
+	ObservationModelParameters alignParams("probabilistic");
+	alignParams.pError=0.0001;
+	alignParams.pMut=0.01;
+	alignParams.maxLengthDel=50;
+	alignParams.forceReadOnHaplotype=true;
+	alignParams.bMid=0;
+	//alignParams.maxLengthIndel=12;
+	//alignParams.numIndels=2;
+	//alignParams.indelDist="uniform";
+
+	vector<Haplotype> tmp_haps;
+	for (size_t h=0;h<haps.size();h++) {
+		rh1.seq.seq=haps[h].seq;
+		rh1.setAllQual(1.0-1e-16);
+
+		Haplotype hRef;
+		uint32_t start=leftPos;
+		uint32_t end=rightPos;
+
+		string refSeq=getRefSeq(start+1, end+1);
+
+
+
+
+		hRef.append(refSeq);
+		/*
+		char lc = (haps[h].seq[haps[h].seq.size()-1]);
+		char lcl;
+		if (lc == 'T') lcl = 'A'; else if (lc == 'A') lcl = 'T'; else if (lc=='G') lcl = 'C'; else if (lc=='C') lcl = 'G';
+
+		hRef.seq+= lcl;
+		*/
+		/*
+		ObservationModelFBMax om(hRef, rh1, 0, alignParams);
+		*/
+		ObservationModelSeqAn om(hRef, rh1, 0, alignParams, score);
+		haps[h].indels.clear();
+		haps[h].snps.clear();
+		//om.reportVariants(haps[h].indels, haps[h].snps, haps[h].align);
+		//om.calcLikelihood();
+		om.align();
+		const MLAlignment & ml=om.getMLAlignment();
+		haps[h].indels=ml.indels;
+		haps[h].snps=ml.snps;
+		haps[h].align=ml.align;
+		haps[h].ml=ml;
+		bool hasStartEndIndel = false;
+		if (ml.hpos[0] == MLAlignment::LO) hasStartEndIndel = true;
+		int hs = ml.hpos.size()-1;
+		if (hs>0 && ml.hpos[hs] == MLAlignment::RO) hasStartEndIndel = true;
+		//if (params.showCandHap) {
+	//			cout << "hap " << h << endl;om.printAlignment(20);
+	//			cout << string(20,' ') << haps[h].align << endl;
+	//	}
+
+		for (map<int, AlignedVariant>::const_iterator it=haps[h].indels.begin(); it!=haps[h].indels.end();it++) variants[it->first].insert(it->second);
+		for (map<int, AlignedVariant>::const_iterator it=haps[h].snps.begin(); it!=haps[h].snps.end();it++) variants[it->first].insert(it->second);
+		if (!hasStartEndIndel) {
+			tmp_haps.push_back(haps[h]);
+		}
+
+	}
+
+	haps.swap(tmp_haps);
+
+	// add REF allele as variant to each haplotype in order to compute coverage statistics
+	for (map<int, std::set<AlignedVariant> >::const_iterator it=variants.begin();it!=variants.end();it++) {
+		for (size_t h=0;h<haps.size();h++) haps[h].addRefVariant(it->first);
+	}
+
+	if (!params.quiet) {
+		for (map<int, std::set<AlignedVariant> >::const_iterator it=variants.begin();it!=variants.end();it++) {
+			cout << "aligned_var at pos " << pos << " " << leftPos+it->first;
+			BOOST_FOREACH(AlignedVariant av, it->second) {
+				cout << " " << av.getString();
+			}
+			cout << endl;
+		}
+	}
+
+
+	return true;
+}
+
+bool DetInDel::getHaplotypes(vector<Haplotype> &haps, const vector<Read> & reads,uint32_t pos, uint32_t & leftPos, uint32_t & rightPos, const AlignedCandidates & candidateVariants)
+{
+
+
+	uint32_t rs=(int(leftPos)>params.minReadOverlap)?(leftPos-params.minReadOverlap):0;
+	uint32_t re=rightPos+params.minReadOverlap;
+	string refSeq=getRefSeq(rs+1, re+1);
+
+	HaplotypeDistribution hd(pos, refSeq, rs);
+
+	// infer empirical haplotype distribution from WS alignment
+	for (size_t r=0;r<reads.size();r++) {
+		hd.insertRead(reads[r].getBam());
+	}
+	hd.setFrequencies();
+
+	haps.clear();
+	vector<Variant> indelVariants;
+
+	/*
+	if (!(candidateVariants.size()>0 && params.checkAllCIGARs==0)) {
+		indelVariants=hd.getIndelVariantsAtMidPos();
+	}
+
+
+	// add any prespecified variants
+	//indelVariants.insert(indelVariants.begin(), variants.begin(), variants.end());
+	*/
+	if (!params.quiet) {
+		cout << "candidate_var at pos: " << pos ;
+		BOOST_FOREACH(AlignedVariant v, candidateVariants.variants) {
+			cout << " " << v.getStartHap() << "," << v.getString();
+		}
+		cout << endl;
+	}
+
+
+	// get haplotypes from empirical distribution
+
+	try {
+		HDIterator2 hdi(hd, params.maxHap, pos, leftPos, rightPos, params.noIndelWindow);
+
+		double logNumHaps=hdi.getLogNumHaps();
+		if (logNumHaps>log(params.skipMaxHap)) {
+			cerr << "tid: " << params.tid << " pos: " << pos << " too many haplotypes [>exp(" << logNumHaps << ")]" << endl;
+			return true;
+		}
+
+		//hdi.generateHapsWithIndels(haps, indels);
+		vector<Haplotype> tmp_haps;
+		hdi.generateHapsWithAlignedVariants(haps, candidateVariants, 0, params.changeINStoN);
+
+
+	
+
+		if (haps.size()>params.skipMaxHap || haps.size()*reads.size()>params.maxHapReadProd) {
+			cerr << "tid: " << params.tid << " pos: " << pos << " too many haplotypes [>" << haps.size() << "]" << " numreads: " << reads.size() << endl;
+			return true;
+		}
+
+		if (params.showHapDist) {
+			cout << endl << "Empirical distribution: " << endl;
+			cout << hdi << endl;
+		}
+
+		leftPos=hdi.start();
+		rightPos=hdi.end();
+
+
+		
+
+		map<int, std::set<AlignedVariant> > variants;
+		alignHaplotypes(haps,pos, leftPos, rightPos, variants);
+
+		// remove duplicate reference-haplotypes of different length
+		bool foundRef = false;
+		for (size_t th=0;th<haps.size();th++) {
+			const Haplotype & hap=haps[th];
+			int num_indels =  hap.countIndels();
+			int num_snps = hap.countSNPs();
+			if (num_indels == 0 && num_snps == 0) {
+				
+
+				if (!foundRef) {
+					tmp_haps.push_back(Haplotype(haps[th]));
+					foundRef = true;
+				}
+			} else {
+				tmp_haps.push_back(Haplotype(haps[th]));
+			}
+		}
+		/*
+		if (params.showCandHap) {
+			for (size_t i=0;i<haps.size();i++) {
+				cout << "PRE FILTER hdi[" << i << "]:" << haps[i] << endl;
+			}
+		}
+		*/
+
+		typedef map<int, AlignedVariant>::const_iterator It;
+		haps.swap(tmp_haps);
+
+		int nh=0;
+		if (params.showCandHap) {
+			for (size_t i=0;i<haps.size();i++) {
+				cout << "POSTFILTER hdi[" << nh++ << "]:" << haps[i] << endl;
+			}
+		}
+	}
+	catch (string s) {
+		if (s=="Blocks are not consecutive.") {
+			cerr << "tid: " << params.tid <<  "pos: " << pos << s << endl;
+			//return true;
+			throw string("hapblock");
+		} else {
+			throw string(s);
+		}
+	}
+	return false;
+}
+
+
+
+
+/*
+// HaplotypeDistribution method
+void DetInDel::computeLikelihoods(const vector<Haplotype> &haps, const vector<Read> & reads, vector<vector<MLAlignment> > & liks, uint32_t leftPos, uint32_t rightPos, vector<int> & onHap)
+{
+//	cout << "Computing likelihoods for all reads and haplotypes.\n";
+	map<size_t, vector<size_t> > hapSizeToIdx;
+	onHap = vector<int>(reads.size(),0); // records whether a read was aligned onto at least one haplotype
+
+	typedef map<size_t, vector<size_t> >::const_iterator hapsCIt;
+
+	for (size_t x=0;x<haps.size();x++) {
+		hapSizeToIdx[haps[x].size()].push_back(x);
+	}
+
+	liks=vector<vector<MLAlignment> >(haps.size(),vector<MLAlignment>(reads.size()));
+
+	for (hapsCIt it=hapSizeToIdx.begin();it!=hapSizeToIdx.end();it++) {
+		const vector<size_t> hapIdx=it->second;
+		// setup observation models for all the reads
+		vector<ObservationModelFBMaxErr> oms; oms.reserve(reads.size());
+		for (size_t r=0;r<reads.size();r++) {
+			const Haplotype & hap=haps[hapIdx[0]];
+			oms.push_back(ObservationModelFBMaxErr(hap, reads[r], leftPos, params.obsParams));
+		}
+
+		for (size_t h=0;h<hapIdx.size();h++) {
+			size_t hidx=hapIdx[h];
+			const Haplotype & hap=haps[hidx];
+
+//			cout << "Haplotype[" << hidx << "]: " << endl;
+
+
+			for (size_t r=0;r<reads.size();r++) {
+				oms[r].changeHaplotype(hap);
+				liks[hidx][r]=oms[r].calcLikelihood();
+				if (!liks[hidx][r].offHapHMQ) onHap[r]=1;
+//				cout << "[" << r << ": " << liks[hidx][r].ll << "] ";
+				if (liks[hidx][r].ll>0.1) {
+				  ObservationModelFBMaxErr om(hap, reads[r], leftPos, params.obsParams);
+				 liks[hidx][r]=om.calcLikelihood();
+				 cout << string(25,' ') << hap.seq << endl;
+				  om.printAlignment(25);
+				  cout << "h: " << h << " r: " << r << endl;
+				  cout << bam1_qname(reads[r].getBam()) << endl;
+				  cerr << "Likelihood>0" << endl;
+				  exit(1);
+				}
+				if (isnan(liks[hidx][r]) || isinf(liks[hidx][r])) {
+					cout << "NAN/Inf error" << endl;
+					throw string("Nan detected");
+				}
+			}
+	//		cout << endl;
+		}
+	}
+}
+*/
+void DetInDel::computeLikelihoods(const vector<Haplotype> &haps, const vector<Read> & reads, vector<vector<MLAlignment> > & liks, uint32_t leftPos, uint32_t rightPos, vector<int> & onHap)
+{
+//	cout << "Computing likelihoods for all reads and haplotypes.\n";
+	onHap = vector<int>(reads.size(),0); // records whether a read was aligned onto at least one haplotype
+
+	typedef map<size_t, vector<size_t> >::const_iterator hapsCIt;
+
+	liks=vector<vector<MLAlignment> >(haps.size(),vector<MLAlignment>(reads.size()));
+	for (size_t hidx=0;hidx<haps.size();hidx++) {
+		const Haplotype & hap=haps[hidx];
+		for (size_t r=0;r<reads.size();r++) {
+			ObservationModelFBMaxErr oms(hap, reads[r], leftPos, params.obsParams);
+			liks[hidx][r]=oms.calcLikelihood();
+			if (!liks[hidx][r].offHapHMQ) onHap[r]=1;
+//				cout << "[" << r << ": " << liks[hidx][r].ll << "] ";
+			if (liks[hidx][r].ll>0.1) {
+				ObservationModelFBMaxErr om(hap, reads[r], leftPos, params.obsParams);
+				liks[hidx][r]=om.calcLikelihood();
+				cout << string(25,' ') << hap.seq << endl;
+				om.printAlignment(25);
+				cout << "hidx: " << hidx << " r: " << r << endl;
+				cout << bam1_qname(reads[r].getBam()) << endl;
+				cerr << "Likelihood>0" << endl;
+				exit(1);
+			}
+			if (isnan(liks[hidx][r]) || isinf(liks[hidx][r])) {
+				cout << "NAN/Inf error" << endl;
+				throw string("Nan detected");
+			}
+		}
+//		cout << endl;
+	}
+}
+
+
+void DetInDel::computeHapPosition(const Haplotype & hap, const Read & read, vector<int> & alPos, int leftPos)
+{
+	// get position on haplotype of read alignment to reference from the aligned position of first and last base in the read
+
+	const bam1_t *b=read.getBam();
+	const bam1_core_t *c=&b->core;
+	uint32_t* cigar=bam1_cigar(b);
+	int k, end, start;
+	end = c->pos;
+
+	int offs=0, l=0, lend; // offset due to SOFT_SKIP at beginning
+							// lend is base for which end is computed (there might be a SOFT_CLIP at the end of the read)
+
+	bool al=false;
+	for (k = 0; k < (int) c->n_cigar; ++k) {
+		int op = cigar[k] & BAM_CIGAR_MASK;
+
+		if (op == BAM_CMATCH || op == BAM_CINS || op == BAM_CREF_SKIP) al=true;
+
+		if (!al && op == BAM_CSOFT_CLIP) offs += cigar[k] >> BAM_CIGAR_SHIFT;
+
+		if (op == BAM_CMATCH || op == BAM_CINS || op == BAM_CSOFT_CLIP)
+					l += cigar[k] >> BAM_CIGAR_SHIFT;
+
+		if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP) {
+			end += cigar[k] >> BAM_CIGAR_SHIFT;
+			lend=l;
+		}
+	}
+
+	start=c->pos-leftPos; // make relative to alignment of haplotypes to reference
+	end-=leftPos;
+
+	// lookup start and end in alignment of haplotype to reference
+
+	for (int x=0;x<int(hap.ml.hpos.size());x++) if (hap.ml.hpos[x]==start) {
+		alPos.push_back(hap.ml.hpos[x]-offs);
+		break;
+	}
+
+	for (int x=int(hap.ml.hpos.size())-1;x>=0;x--) if (hap.ml.hpos[x]==end) {
+		alPos.push_back(hap.ml.hpos[x]-lend);
+		break;
+	}
+
+
+}
+
+void DetInDel::computeLikelihoodsFaster(const vector<Haplotype> &haps, const vector<Read> & reads, vector<vector<MLAlignment> > & liks, uint32_t leftPos, uint32_t rightPos, vector<int> & onHap)
+{
+	liks.clear();
+	liks=vector<vector<MLAlignment> >(haps.size(),vector<MLAlignment>(reads.size()));
+	onHap = vector<int>(reads.size(),0); // records whether a read was aligned onto at least one haplotype
+
+	const unsigned int kmer=4;
+
+	for (size_t h=0;h<haps.size();h++) {
+		const Haplotype & hap = haps[h];
+		//cout << "Haplotype[" << h << "]: " << endl;
+		HapHash hash(kmer, hap);
+		for (size_t r=0;r<reads.size();r++) {
+			// given BWA alignment of read to reference, estimate a number of likely alignments to the haplotype
+			vector<int> alPos;
+			computeHapPosition(hap, reads[r], alPos, leftPos);
+		//	cout << "[" << r << ": " << alPos.size() ;
+
+
+			ObservationModelS om(hap, reads[r], leftPos, params.obsParams);
+
+			// align using guessed alignments and the haplotype hash
+			liks[h][r]=om.align(hash);
+		//	cout << "," << liks[h][r].ll << "] ";
+			if (!liks[h][r].offHapHMQ) onHap[r]=1; // if on-haplotype with artificial high mapping quality
+
+			/*
+			seqan::Score<int> score(-1, -460, -100,-960);
+			ObservationModelSeqAn om2(hap, reads[r], leftPos, params.obsParams, score);
+			om2.align();
+			*/
+
+ 		}
+		//cout << "done" << endl;
+	}
+
+	// check HMQ off-haplotype reads
+
+	// realign a couple of high-mapping quality reads to obtain new candidate indels
+	// propose new set of candidate haplotypes by realigning all reads to the new set of candidate haplotypes
+
+
+	// --bamFiles /Users/caa/Documents/workspace/DInDelFastProb/bamfiles.txt --output test --region 12036338-12036340 --maxReadLength 60 --tid 17 --maxHap 8 --showEmpirical  --minReadOverlap 20 --width 60 --maxLengthIndel 10 --ref /Users/caa/data/human_b36_female.Y.fa --pError 0.0005  --maxRead 2000 --computeMAP
+}
+
+double DetInDel::getPairPrior(const AlignedVariant & av1, const AlignedVariant & av2, int leftPos, const AlignedCandidates & candidateVariants)
+{
+	std::set<AlignedVariant> vars;  vars.insert(av1); vars.insert(av2);
+	double ll = 0.0;
+	BOOST_FOREACH(AlignedVariant avar, vars) {
+		double lnf = 0.0;
+		int type = avar.getType();
+		const AlignedVariant *av = candidateVariants.findVariant(avar.getStartHap()+leftPos, avar.getType(), avar.getString());
+
+		if (type==Variant::SNP) lnf = log(params.priorSNP); else if (type==Variant::DEL || type==Variant::INS) lnf = log(params.priorIndel);
+		if (av==NULL) {
+			ll += lnf;
+		} else {
+			double prior = av->getFreq();
+			if (prior<0.0) ll += lnf; else ll+=log(prior);
+		}
+	}
+
+	return ll;
+
+}
+
+double DetInDel::getHaplotypePrior(const Haplotype & h1, const Haplotype & h2, int leftPos, const AlignedCandidates & candidateVariants)
+{
+	// returns log prior probability of the pair of haplotypes, based on settings in params
+	// one day maybe change prior for known SNPs
+	double ll=0.0;
+
+	// count indels
+	typedef  map <int, AlignedVariant>::const_iterator AVIt;
+	//hash_map <int, int> indels, snps;
+	std::set <AlignedVariant> indels, snps;
+	for (AVIt it=h1.indels.begin();it!=h1.indels.end();it++) if (it->second.getString().find("*REF")==string::npos && it->second.getString().find("=>")==string::npos ) {
+			//indels[it->first]=1;
+			indels.insert(it->second);
+	}
+	for (AVIt it=h2.indels.begin();it!=h2.indels.end();it++) if (it->second.getString().find("*REF")==string::npos && it->second.getString().find("=>")==string::npos ) {
+		//indels[it->first]=1;
+		indels.insert(it->second);
+	}
+
+	for (AVIt it=h1.snps.begin();it!=h1.snps.end();it++) if (it->second.getString().find("*REF")==string::npos && it->second.getString().find("=>D")==string::npos) {
+		snps.insert(it->second);
+		//snps[it->first]=1;
+	}
+	for (AVIt it=h2.snps.begin();it!=h2.snps.end();it++) if (it->second.getString().find("*REF")==string::npos && it->second.getString().find("=>D")==string::npos) {
+		snps.insert(it->second);
+		//snps[it->first]=1;
+	}
+
+	BOOST_FOREACH(AlignedVariant indel, indels) {
+	//	cout << "indel: " << indel.getString() << " " << indel.getStartHap();
+		const AlignedVariant *av = candidateVariants.findVariant(indel.getStartHap()+leftPos, indel.getType(), indel.getString());
+		if (av==NULL) {
+		//	cout << " not found. " << endl;
+			ll += log(params.priorIndel);
+		} else {
+			double prior = av->getFreq();
+		//	cout << " found: " << prior << " " << av->getStartHap() << " " << av->getFreq() << endl;
+			if (prior<0.0) ll += log(params.priorIndel); else ll+=log(prior);
+		}
+	}
+	BOOST_FOREACH(AlignedVariant indel, snps) {
+	//	cout << "snp: " << indel.getString() << " " << indel.getStartHap();
+		const AlignedVariant *av = candidateVariants.findVariant(indel.getStartHap()+leftPos, indel.getType(), indel.getString());
+		if (av==NULL) {
+		//	cout << " not found. " << endl;
+			ll += log(params.priorIndel);
+		} else {
+			double prior = av->getFreq();
+		//	cout << " found: " << prior << " " << av->getStartHap() << " " << av->getFreq() << endl;
+			if (prior<0.0) ll += log(params.priorIndel); else ll+=log(prior);
+		}
+	}
+	/*
+	BOOST_FOREACH(AlignedVariant snp, snps) {
+		const AlignedVariant *av = candidateVariants.findVariant(snp.getStartHap()+leftPos, snp.getString());
+		if (av==NULL) ll += log(params.priorSNP); else {
+			double prior = av->getFreq();
+			if (prior<0.0) ll += log(params.priorSNP); else ll+=log(prior);
+		}
+	}
+	*/
+	/*
+	int numIndels=int(indels.size());
+	int numSNPs=int(snps.size());
+
+	ll+=double(numIndels)*log(params.priorIndel);
+	ll+=double(numSNPs)*log(params.priorSNP);
+	*/
+//	cout << "ll: " << ll << endl;
+	return ll;
+}
+
+#define FILTERHAPS
+#ifdef FILTERHAPS
+
+void DetInDel::filterHaplotypes(const vector<Haplotype> & haps, const vector<Read> & reads, const vector<vector<MLAlignment> > & liks,  vector<int> & filtered, map<pair<int, AlignedVariant>, VariantCoverage> & varCoverage, bool doFilter)
+{
+
+	const int debugfh = 0;
+	int numFiltered = 0;
+	int numHaps = int(haps.size());
+	filtered = vector<int>(haps.size(),0);
+
+	varCoverage.clear();
+
+
+	typedef pair<int, AlignedVariant> PAV;
+	map<PAV, vector< std::set<int> > > hVarCoverage; // coverage of each variant per haplotype, so that eventually only coverage of reads that were not filtered out are reported
+
+	for (int h=0;h<int(haps.size());h++) {
+		// check all reads aligned to this haplotype and select the ones that are not off-haplotype and aligned without indels and at most two high-quality mismatches
+		std::set<int> selReads;
+		for (size_t r=0;r<reads.size();r++) {
+			int sel = 0;
+			if (!liks[h][r].offHapHMQ && liks[h][r].numIndels == 0) { // && liks[h][r].numMismatch<=3) {
+				selReads.insert(int(r));
+				sel = 1;
+			}
+			if (debugfh) cout << "sel: " << "h: " << h << " " << bam1_qname(reads[r].getBam()) << " mpos: " << reads[r].matePos << " selected: " << sel << endl;
+
+		}
+
+		// check each variant in the haplotype
+
+		bool allCovered = true; // all variants in haplotype should be covered by at least one read.
+		for (map<int, AlignedVariant>::const_iterator it = haps[h].indels.begin();it!= haps[h].indels.end();it++) {
+				const AlignedVariant & av = it->second;
+
+				PAV pav(it->first, av);
+
+				map<PAV,vector<std::set<int> > >::iterator pit  = hVarCoverage.find(pav);
+				if (pit == hVarCoverage.end()) {
+					hVarCoverage[pav] = vector< std::set<int> >(haps.size()*2);
+				}
+
+				if (av.getType() == Variant::INS || av.getType() == Variant::DEL) {
+					int left = av.getLeftFlankRead() - params.obsParams.padCover;     // readFlankLeft is the first left unique base flanking the indel
+					int right = av.getRightFlankRead() + params.obsParams.padCover;
+					int leftV = av.getLeftFlankRead();
+					int rightV = av.getRightFlankRead();
+
+
+					int len = right-left+1;
+					bool covered = false;
+					int numdelcovered = 0;
+					//cout << "left: " << left << " right: " << right << " len: " << len << endl;
+					if (av.getType() == Variant::DEL) {
+						// see if there is at least one read spanning the interval with at most one mismatch
+						BOOST_FOREACH(int r, selReads) {
+							std::set <int> c;
+							int strand = 0;
+							if (reads[r].isUnmapped()) {
+								if (!reads[r].mateIsReverse()) strand = 1;
+							} else {
+								if (reads[r].isReverse()) strand = 1;
+							}
+
+							int nmm = 0;
+							for (int b=0;b<=int(liks[h][r].hpos.size());b++) {
+								int hb = liks[h][r].hpos[b];
+								if (hb>=left && hb<=right) {
+									c.insert(hb);
+									if ( haps[h].seq[hb]!='N' && haps[h].seq[hb]!=reads[r].seq.seq[b]) nmm++;
+								}
+
+							}
+							int cov = 0;
+							if (int(c.size())>= len && nmm<=params.obsParams.maxMismatch) {
+								cov = 1;
+								numdelcovered++;
+								hVarCoverage[pav][h+strand*numHaps].insert(r);
+							}
+							//cout << "RC" << bam1_qname(reads[r].getBam()) << " cov: " << cov << endl;
+						}
+
+						if (numdelcovered>=1) {
+							covered = true;
+						}
+					} else if (av.getType() == Variant::INS) {
+						// see if all bases in the haplotype from left to right are covered by at least one read that matches the haplotype without indels and at most 2 mismatches
+						vector<int> hapBaseCovered(len, 0);
+						vector<int> thisReadCovered(len, 0);
+						int lenins = av.getSeq().size();
+
+						BOOST_FOREACH(int r, selReads) {
+							for (int x=0;x<len;x++) thisReadCovered[x]=0;
+							int nmm = 0;
+							std::set <int> c;
+							
+							// determine read strand
+							int strand = 0;
+							if (reads[r].isUnmapped()) {
+								if (!reads[r].mateIsReverse()) strand = 1;
+							} else {
+								if (reads[r].isReverse()) strand = 1;
+							}
+
+							for (int b=0;b<=int(liks[h][r].hpos.size());b++) {
+								int hb = liks[h][r].hpos[b];
+								if (hb>=left && hb<=right) {
+									// covered even if there is a mismatch
+									thisReadCovered[hb-left]+=1;
+									c.insert(hb);
+									// count number of mismatches
+									if (haps[h].seq[hb]!=reads[r].seq.seq[b]) nmm++;
+								}
+							}
+
+						        bool thisread_covered = false;
+							// for insertion <= 10 bp, whole insertion+padCover must be covered with at most one error by at least one read (ie not just covered by multiple reads together)
+							if ( (lenins>10 && nmm<=params.obsParams.maxMismatch) || (lenins<=10 && int(c.size())>=len && nmm<=params.obsParams.maxMismatch)) {
+								thisread_covered = true;
+								for (size_t x=0;x<thisReadCovered.size();x++) {
+									hapBaseCovered[x] += thisReadCovered[x];
+									if (thisReadCovered[x]==0) {
+										thisread_covered = false;
+									}
+									if (debugfh) cout << " " << hapBaseCovered[x];
+								}
+								if (thisread_covered) {
+									hVarCoverage[pav][h+strand*numHaps].insert(r);
+								}
+							}
+							
+
+							if (0) cout << " hap " << h << " var: " << av.getString() << " len: " << len << " " << bam1_qname(reads[r].getBam()) << " nmm: " << nmm << " c.size(): " << c.size() << " mpos: " << reads[r].matePos << " covered: " << thisread_covered << endl;
+							if (thisread_covered) covered=true;
+
+
+						}
+					}
+
+					if (!covered) {
+						allCovered = false;
+						break;
+					}
+					if (debugfh) cout << "hap" << h << " var: " << av.getString() << " COVERED:" << covered << endl;
+
+				}
+
+		}
+		if (doFilter) {
+			if (!allCovered) {
+				numFiltered++;
+				filtered[h]=1;
+			}
+		}
+		if (debugfh) cout << "Haplotype[" << h << "]: filtered " << filtered[h] << endl;
+
+	}
+	cout << "Filtered " << numFiltered << " haplotypes." << endl;
+	// determine coverage of each variant
+	for (map<PAV, vector <std::set<int> > >::const_iterator it = hVarCoverage.begin();it != hVarCoverage.end(); it++) {
+		const PAV & pav = it->first;
+		std::set<int> rf, rr; // forward and reverse strand reads
+		for (int h=0;h<numHaps;h++) if (filtered[h]!=1) {
+			rf.insert(hVarCoverage[pav][h].begin(), hVarCoverage[pav][h].end());
+			rr.insert(hVarCoverage[pav][h+numHaps].begin(), hVarCoverage[pav][h+numHaps].end());
+		}
+		varCoverage[pav]=VariantCoverage(int(rf.size()), int(rr.size()));
+	}
+
+
+}
+#endif
+
+void DetInDel::estimateHaplotypeFrequenciesBayesEM(const vector<Haplotype> & haps, const vector<Read> & reads, const vector<vector<MLAlignment> > & liks, vector<double> & hapFreqs, vector <HapEstResult > & posteriors,  uint32_t candPos, uint32_t leftPos,   uint32_t rightPos, OutputData & glfData, int index, const AlignedCandidates & candidateVariants, string program="all")
+
+{
+
+	// estimate haplotype frequencies using EM
+	hapFreqs.clear();
+
+	size_t nh=haps.size();
+	size_t nr=reads.size();
+
+	vector<double> rl(nh*nr,0.0); // read given haplotype likelihoods
+
+	vector<double> z(nh*nr,0.0); // expectations of read-haplotype indicator variables
+	vector<double> pi(nh); // haplotype frequencies
+	vector<double> lpi(nh); // expectation of log frequencies
+	vector<double> nk(nh,0.0), ak(nh,0.0); // counts for each haplotype
+
+	hapFreqs=nk;
+
+	int numUnmappedRealigned=0;
+	int idx=0;
+	int numReadOffAllHaps=0;
+	for (size_t r=0;r<nr;r++) {
+		int offallhap=1;
+		for (size_t h=0;h<nh;h++) {
+			// initialize read-haplotype likelihoods
+			rl[idx]=liks[h][r].ll;
+			if (!liks[h][r].offHap) offallhap=0;
+			idx++;
+		}
+		if (offallhap) {
+			numReadOffAllHaps++;
+
+		}else {
+			if (reads[r].isUnmapped()) numUnmappedRealigned++;
+		}
+	}
+
+
+	// filter reads
+
+	vector<int> filtered(nh, 0);
+	map<pair<int, AlignedVariant>, VariantCoverage> varCoverage;
+
+	//if (params.filterHaplotypes) {
+	cout << "ALWAYS CALLING ::filterHaplotypes" << endl;
+	filterHaplotypes(haps, reads,liks, filtered, varCoverage, params.filterHaplotypes);
+	//}
+
+
+	typedef pair<int, AlignedVariant> PAV;
+
+	std::set< PAV > allVariants;
+	map<int, std::set<PAV> > allVariantsByPos; //
+
+	typedef map<int, AlignedVariant>::const_iterator It;
+	typedef map<int, std::set<PAV> >::const_iterator PIt;
+
+	for (size_t th=0;th<nh;th++) {
+		const Haplotype & hap=haps[th];
+		for (It it=hap.indels.begin();it!=hap.indels.end();it++) {
+			if (!it->second.isRef() && !(it->second.isSNP() && it->second.getString()[3]=='D')) {
+				allVariants.insert(PAV(it->first,it->second));
+				allVariantsByPos[it->first].insert(PAV(it->first,it->second));
+			}
+		}
+	}
+
+	// set active variants, and divide into snps and indels
+	vector< std::set< PAV > >  activeVariants, activeSNPs, activeIndels;
+
+	int nav=0;
+	int PRID=-1;
+	if (program=="all") {
+		std::set<PAV> snps, indels;
+		BOOST_FOREACH(PAV pav, allVariants) {
+			if (pav.second.isSNP()) {
+				snps.insert(pav);
+			} else if (pav.second.isIndel()) {
+				indels.insert(pav);
+			}
+		}
+
+		// both (double prior)
+		activeVariants.push_back(allVariants);
+		activeIndels.push_back(indels);
+		activeSNPs.push_back(snps);
+		nav++;
+		PRID=1;
+	} else if (program=="singlevariant") {
+		std::set < std::set<PAV> > ssPAV;
+		for (size_t h=0;h<haps.size();h++) if (filtered[h]==0) {
+			const Haplotype & hap=haps[h];
+
+
+			//cout << "hap[" << h << "].seq: " << hap.seq << endl;
+
+			//cout << "vars:";
+			std::set<PAV> act;
+			for (It it=hap.indels.begin();it!=hap.indels.end();it++) {
+				if (!it->second.isRef() && !(it->second.isSNP() && it->second.getString()[3]=='D')) {
+					act.insert(PAV(it->first, it->second));
+			//		cout << "[" << it->first << "," << it->second.getString() << "]";
+				}
+			}
+			//cout << endl;
+			ssPAV.insert(act);
+		}
+
+		nav=0;
+		BOOST_FOREACH(std::set<PAV> s, ssPAV) {
+			std::set<PAV> snps, indels;
+			BOOST_FOREACH(PAV pav, s) {
+				if (pav.second.isSNP()) {
+					snps.insert(pav);
+				} else if (pav.second.isIndel()) {
+					indels.insert(pav);
+				}
+			}
+
+			// both (double prior)
+			activeVariants.push_back(s);
+			activeIndels.push_back(indels);
+			activeSNPs.push_back(snps);
+			nav++;
+		}
+
+		PRID=2;
+	} else if (program == "priorpersite") {
+		nav = 0;
+
+		// add reference haplotype
+		activeVariants.push_back(std::set<PAV>());
+		activeIndels.push_back(std::set<PAV>());
+		activeSNPs.push_back(std::set<PAV>());
+
+
+
+		for (map<int, std::set<PAV> >::const_iterator site_it = allVariantsByPos.begin();site_it!=allVariantsByPos.end();site_it++) {
+			std::set<PAV> snps, indels;
+			BOOST_FOREACH(PAV pav, site_it->second) {
+				if (pav.second.isSNP()) snps.insert(pav);
+				else if (pav.second.isIndel()) indels.insert(pav);
+			}
+
+			int maxStateSnp = (snps.size()==0)?1:2;
+			int maxStateIndel = (indels.size()==0)?1:2;
+
+			int prevNumActive = activeVariants.size();
+			int sSnp = 1, sIndel = 1;
+			//for (int sSnp = 0;sSnp<maxStateSnp;sSnp++) {
+			//	for (int sIndel = 0; sIndel < maxStateIndel; sIndel++) {
+			//
+			//		if (sSnp == 1 || sIndel == 1) {
+						// extend previous activeVariants
+
+						for (int pna = 0;pna<prevNumActive;pna++) {
+							std::set<PAV> av = activeVariants[pna];
+							std::set<PAV> aIndels = activeIndels[pna];
+							std::set<PAV> aSNPs = activeSNPs[pna];
+							if (sSnp == 1) {
+								av.insert(snps.begin(),snps.end());
+								aSNPs.insert(snps.begin(),snps.end());
+							}
+							if (sIndel == 1) {
+								av.insert(indels.begin(),indels.end());
+								aIndels.insert(indels.begin(),indels.end());
+							}
+
+							activeVariants.push_back(av);
+							activeIndels.push_back(aIndels);
+							activeSNPs.push_back(aSNPs);
+						}
+
+			//		}
+		//		}
+		//	}
+
+		}
+		nav = activeVariants.size();
+
+		PRID = 3;
+
+	} else {
+		cerr << "Unknown EM option" << endl;
+		exit(1);
+	}
+
+	vector<int> compatible(nh,0);
+	vector<double> logliks(nav,0.0);
+	vector<double> logpriors(nav, 0.0);
+	vector<double> post(nav,0.0);
+	vector<double> freqs(nav*nh,0.0);
+
+	// create matrix of which variant is active in which set
+	idx=0;
+	int nv=int(allVariants.size());
+	vector<int> active(nav*nv,0), hapHasVar(nh*nv,0);
+
+	//cout << "active: " << endl;
+	BOOST_FOREACH(PAV pav, allVariants) {
+	//	 cout << pav.first << " " << pav.second.getString() << " ";
+		for (int a=0;a<nav;a++) {
+			if (activeVariants[a].find(pav)!=activeVariants[a].end()) active[a*nv+idx]=1;
+	//		cout << " " << active[a*nv+idx];
+		}
+	//	 cout << endl;
+		for (size_t h=0;h<nh;h++) {
+			It it=haps[h].indels.find(pav.first);
+			if (it!=haps[h].indels.end() && it->second.getString()==pav.second.getString()) hapHasVar[h*nv+idx]=1;
+//			cout << "[" <<  active[h*nv+idx] << " " << hapHasVar[h*nv+idx]<< " ]";
+
+		}
+//		cout << endl;
+		idx++;
+	}
+
+	/*
+	cout << "allVariants: ";
+	BOOST_FOREACH(PAV pav, allVariants) {
+		cout << " [" << pav.first << " " << pav.second.getString() << "]";
+	}
+	cout << endl;
+	*/
+
+
+
+
+
+	double logz=-HUGE_VAL;
+
+	double a0=params.bayesa0;
+
+	for (int th=0;th<nav;th++) {
+
+		// set active variants
+
+		double logprior=0.0;
+
+		map <int, int> sites;
+		BOOST_FOREACH(PAV pav, activeSNPs[th]) {
+			sites[pav.first]=1;
+
+			const AlignedVariant & avar = pav.second;
+			const AlignedVariant *av = candidateVariants.findVariant(avar.getStartHap()+leftPos, avar.getType(), avar.getString());
+			int type = pav.second.getType();
+
+			double lnf = 0.0;
+
+			if (type==Variant::SNP) lnf = log(params.priorSNP); else if (type==Variant::DEL || type==Variant::INS) lnf = log(params.priorIndel);
+			if (av==NULL) {
+				logprior += lnf;
+			} else {
+				double prior = av->getFreq();
+				if (prior<0.0) logprior += lnf; else logprior+=log(prior);
+			}
+
+		}
+		BOOST_FOREACH(PAV pav, activeIndels[th]) {
+
+			const AlignedVariant & avar = pav.second;
+			const AlignedVariant *av = candidateVariants.findVariant(avar.getStartHap()+leftPos, avar.getType(), avar.getString());
+			int type = pav.second.getType();
+
+			double lnf = 0.0;
+
+			if (type==Variant::SNP) lnf = log(params.priorSNP); else if (type==Variant::DEL || type==Variant::INS) lnf = log(params.priorIndel);
+			if (av==NULL) {
+				logprior += lnf;
+			} else {
+				double prior = av->getFreq();
+				if (prior<0.0) logprior += lnf; else logprior+=log(prior);
+			}
+
+			sites[pav.first]=2;
+		}
+
+
+		/*
+		for (map<int,int>::const_iterator it=sites.begin();it!=sites.end();it++) {
+			if (it->second==2) logprior+=log(params.priorIndel); else if (it->second==1) logprior+=log(params.priorSNP);
+		}
+		*/
+
+		logpriors[th]=logprior;
+
+//		cout << "Number of indels: " << ni << " number of SNPs: " << ns << endl;
+
+		// check haplotypes
+
+		int numah=0; // number of haplotypes for which frequencies will be estimated
+		for (size_t h=0;h<nh;h++) {
+			compatible[h]=1;
+			if (filtered[h]!=0) {
+				compatible[h]=0;
+			} else {
+				for (It it=haps[h].indels.begin();it!=haps[h].indels.end();it++) {
+					if (!it->second.isRef() && !(it->second.isSNP() && it->second.getString()[3]=='D') && activeVariants[th].find(PAV(it->first,it->second))==activeVariants[th].end()) {
+						// haplotype h has a non-ref variant that is not one of the active variants
+						compatible[h]=0;
+						break;
+					}
+				}
+			}
+			if (compatible[h]) numah++;
+		}
+
+
+		// run EM for this set of active variants
+		bool converged=false;
+		double tol=params.EMtol;
+
+		double eOld=-HUGE_VAL, eNew;
+
+		// initialize frequencies
+		for (size_t h=0;h<nh;h++) if (compatible[h]) lpi[h]=log(1.0/double(numah)); else lpi[h]=-100;
+
+		/*
+		for (size_t r=0;r<nr;r++) {
+				cout << "rl[" << r << "]:";
+				for (size_t h=0;h<nh;h++) {
+					cout << " " << rl[r*nh+h];
+				}
+				cout << endl;
+			}
+		*/
+		double loglik, llNew, llOld=-HUGE_VAL;
+		int iter=0;
+		while (!converged) {
+			//cout << endl << "EM[" << iter << "]:" << endl;
+			// compute expectation of indicator variables
+			for (size_t h=0;h<nh;h++) nk[h]=0.0;
+
+			loglik=0.0;
+
+			int idx=0;
+			for (size_t r=0;r<nr;r++) {
+				double lognorm=-HUGE_VAL;
+				// compute responsibilities
+				for (size_t h=0;h<nh;h++) {
+					z[idx]=lpi[h]+(rl[idx]);
+					lognorm=addLogs(lognorm, z[idx]);
+					idx++;
+				}
+				idx-=nh;
+				// normalize and compute counts
+				for (size_t h=0;h<nh;h++) {
+					z[idx]-=lognorm;
+					z[idx]=exp(z[idx]);
+					nk[h]+=z[idx];
+					idx++;
+				}
+				loglik+=lognorm;
+
+			}
+			// compute frequencies
+			//cout << "pi: ";
+
+			double ahat=0.0;
+			for (size_t h=0;h<nh;h++) if (compatible[h]) {
+					ak[h]=nk[h]+a0; // a0 is Dirichlet prior parameter
+					ahat+=ak[h];
+			}
+			double dahat=boost::math::digamma(ahat);
+
+			for (size_t h=0;h<nh;h++) {
+
+				// do variational bayes
+				if (compatible[h]) {
+					lpi[h]=boost::math::digamma(ak[h])-dahat;
+					pi[h]=log((a0+nk[h])/(double(numah)*a0+double(nr)));
+				} else {
+					lpi[h]=-100;
+					pi[h]=-100;
+				}
+			//	cout << " " << pi[h];
+			//	zp+=exp(pi[h]);
+			}
+			//cout << " zp: " << zp << endl;
+
+
+			idx=0;
+			eNew=0.0;
+			for (size_t r=0;r<nr;r++) {
+				for (size_t h=0;h<nh;h++) {
+				// compute responsibilities
+					eNew+=z[idx]*( pi[h]+rl[idx]);
+					idx++;
+				}
+			}
+			//cout << "eOld: " << eOld << " " << " eNew: " << eNew; for (size_t h=0;h<nh;h++) { cout << " " << pi[h]; } cout << endl;
+			/*
+			for (size_t r=0;r<nr;r++) {
+				cout << "z[" << r << "]:";
+				for (size_t h=0;h<nh;h++) {
+					cout << " " << z[r*nh+h];
+				}
+				cout << endl;
+			}
+			*/
+			llNew=loglik;
+			//cout << "loglik: " << loglik << endl;
+			if (0 && llOld>llNew+1e-5)  {
+				cerr << "ERROR: nr: " << nr << " eOld: " << eOld << " eNew: " << eNew << " diff: " << eOld-eNew << endl;
+				cout << "ERROR: nr: " << nr << " eOld: " << eOld << " eNew: " << eNew << " diff: " << eOld-eNew << endl;
+				cerr << "ERROR: nr: " << nr << " llOld: " << llOld << " eNew: " << llNew << " diff: " << llOld-llNew << endl;
+				cout << "ERROR: nr: " << nr << " llOld: " << llOld << " eNew: " << llNew << " diff: " << llOld-llNew << endl;
+
+				//throw string("EM Error in estimateHapFreqs");
+				//iter=100;
+
+			}
+			converged=(fabs(eOld-eNew))<tol || iter>25;
+			//cout << "iter: " << iter << " eOld: " << eOld << " eNew: " << eNew << endl;
+
+			eOld=eNew;
+			llOld=llNew;
+			iter++;
+
+
+		}
+
+
+		// check sum
+
+		double z=0.0;
+		for (size_t y=0;y<nh;y++) {
+			z+=exp(pi[y]);
+		}
+
+		if (0) {
+		cout << "th: " << th << endl;
+		for (size_t y=0;y<nh;y++) {
+			cout << "[" << y << "," << exp(pi[y]) << "]";
+		}
+		cout << endl << endl;
+		}
+
+		logliks[th]=loglik;
+		logz=addLogs(logz, logliks[th]+logprior);
+		for (size_t h=0;h<nh;h++) { freqs[th*nh+h]=exp(pi[h])/z; }
+		//for (size_t h=0;h<nh;h++) { cout << " " << freqs[th*nh+h]; } cout << endl;
+
+		//cout << "loglik: " << loglik << " " << logliks[th] << " logprior: " << logprior << endl << endl;
+
+	}
+
+	for (int a=0;a<nav;a++) {
+		post[a]=exp(logliks[a]+logpriors[a]-logz);
+		//cout << "post[" << a << "]: " << post[a] << endl;
+	}
+
+	for (size_t h=0;h<nh;h++) {
+		hapFreqs[h]=0.0;
+	}
+
+	for (int th=0;th<nav;th++) for (size_t h=0;h<nh;h++) {
+
+		hapFreqs[h]+=exp(logliks[th]+logpriors[th]-logz)*freqs[th*nh+h];
+	}
+
+	/*
+	cout << "hapFreqs:\n ";
+	for (size_t th=0;th<nh;th++) {
+		cout << "hapFreq[" << th << "]: " << hapFreqs[th] << endl;
+		cout << "H" << th << ": " << logliks[th] << " " << logpriors[th] << " ";
+		for (size_t h=0;h<nh;h++) {
+			cout << " " << freqs[th*nh+h];
+		}
+		cout << endl;
+	}
+	*/
+
+	cout << endl;
+
+	// compute marginal posteriors for the individual variants
+	vector< std::set<int> > readidx(myBams.size());
+	for (int r=0;r<int(nr);r++) readidx[reads[r].poolID].insert(r);
+
+	vector<double> prior(nh*nh,0.0);
+
+	idx=-1;
+	BOOST_FOREACH(PAV pav, allVariants) {
+		idx++;
+
+		double logp=-HUGE_VAL;
+		double freq=0.0;
+		for (int th=0;th<nav;th++) {
+			if (active[th*nv+idx]) {
+				logp=addLogs(logp, logliks[th]+logpriors[th]);
+			}
+		}
+
+		for (size_t h=0;h<nh;h++) {
+			if (hapHasVar[h*nv+idx]) {
+				freq+=hapFreqs[h];
+			}
+		}
+
+		logp-=logz;
+
+		// compute marginalized haplotype frequencies
+		vector<double> prior(nh*nh,0.0);
+
+		bool doGLF=false; //(candPos==leftPos+pav.first)?true:false;
+
+		const AlignedVariant & avar = pav.second;
+		const AlignedVariant *av = candidateVariants.findVariant(avar.getStartHap()+leftPos, avar.getType(), avar.getString());
+		doGLF = (av==NULL)?false:true;
+
+		//change this
+if (params.outputGLF && doGLF) {
+		map<int,int> otn; // old to new haplotype index
+		// vector for mapping old haplotype index to new haplotype index
+		vector<int> marsum(nv, 0);
+		int s=1;
+		for (int y=0;y<nv;y++) {
+			if (y!=idx) {
+				marsum[y]=s;
+				s*=2;
+			} else marsum[y]=0;
+		//	cout << "marsum[" << y << "]: " << marsum[y] << endl;
+		}
+
+
+		// make a map of new marginalized states to the corresponding new index in the haplotype arrays.
+		map<int, int> mar_states;
+		for (int h=0;h<int(nh);h++) {
+			int nidx=0;
+			for (int v=0;v<nv;v++) nidx+=marsum[v]*hapHasVar[h*nv+v];
+			map<int,int>::iterator it=mar_states.find(nidx);
+			if (it!=mar_states.end()) otn[h]=it->second; else {
+				int ns=int(mar_states.size());
+				mar_states[nidx]=ns;
+				otn[h]=ns;
+			}
+		//	cout << "oth["<<h<<"]: " << otn[h] << endl;
+		}
+
+		int nmarhap=mar_states.size();
+
+		vector<double> marFreqs(nmarhap,0.0); //marginal frequencies
+		for (size_t h=0;h<nh;h++) {
+			int newh=otn[h];
+			marFreqs[newh]+=hapFreqs[h];
+		}
+		// convert back to conditional frequencies/probabilities
+		for (int h=0;h<nmarhap;h++) {
+			if (marFreqs[h]<1e-16) marFreqs[h]=-50; else marFreqs[h]=log(marFreqs[h]);
+		}
+		//	cout << "marFreq[]: "; for (size_t x=0;x<nmarhap;x++) cout << " " << marFreqs[x]; cout << endl;
+
+		// compute for every pair of haplotypes the likelihood of drawing it given priors on the variants and the estimated frequencies
+		for (size_t h1=0;h1<nh;h1++) {
+			for (size_t h2=h1;h2<nh;h2++) {
+				prior[h1*nh+h2]=marFreqs[otn[h1]]+marFreqs[otn[h2]];
+		//		cout << "prior: " << h1 << " " << h2 << ": " << prior[h1*nh+h2] << endl;
+			}
+		}
+}
+		// check which how many reads to a haplotype with this variant
+
+		std::set<int> support;
+		int totnf=0, totnr=0;
+		double log5=log(0.5);
+		if (1) for (size_t b=0;b<myBams.size();b++) {
+			double msq=0.0;
+
+			int nf=0;
+			int nr=0;
+			vector<double> lik(3,0.0);
+
+			if (readidx[b].size()) {
+				// compute RMS of mapping qualities
+
+				int pos=pav.first;
+				msq=0.0;
+				int n=0;
+				if (params.outputGLF && doGLF) {
+				for (int i=0;i<3;i++) lik[i]=-HUGE_VAL;
+				for (size_t h1=0;h1<nh;h1++) for (size_t h2=h1;h2<nh;h2++) {
+					int genotype=hapHasVar[h1*nv+idx]+hapHasVar[h2*nv+idx];
+					double pr=prior[h1*nh+h2];
+				//	cout << "prior: " << h1 << " " << h2 << ": " << prior[h1*nh+h2];
+					double ll=pr;
+					BOOST_FOREACH(int r, readidx[b]) {
+						ll+=log5+addLogs(rl[r*nh+h1],rl[r*nh+h2]);
+					}
+
+					lik[genotype]=addLogs(lik[genotype],ll);
+				//	cout << " ll: " << ll << " " << lik[genotype] << endl;
+				}
+				}
+
+				BOOST_FOREACH(int r, readidx[b]) {
+					//cout << " " << 10*log10(1-reads[r].mapQual);
+
+					size_t mlidx; double ml=-HUGE_VAL;
+					std::set<size_t> mlis;
+					for (size_t hi=0;hi<nh;hi++) {
+						if (liks[hi][r].ll>=ml) {
+							mlidx=hi;
+							ml=liks[hi][r].ll;
+						}
+					}
+					for (size_t hi=0;hi<nh;hi++) {
+						if (liks[hi][r].ll>=ml-1e-7) {
+							mlis.insert(hi);
+						}
+					}
+					bool nrt=false, nft=false;
+
+					map<int,bool>::const_iterator it;
+					BOOST_FOREACH(size_t h, mlis) {
+						bool covered=false;
+						if (pav.second.isIndel()) {
+							it=liks[h][r].hapIndelCovered.find(pos);
+							if (it!=liks[h][r].hapIndelCovered.end() && it->second) covered=true;
+						} else if (pav.second.isSNP()) {
+							it=liks[h][r].hapSNPCovered.find(pos);
+							if (it!=liks[h][r].hapSNPCovered.end() && it->second) covered=true;
+						}
+						if (covered) { // hapHasVar[] is to check whether haplotype truely has variant or only the reference
+							if (hapHasVar[h*nv+idx]) {
+								//if (pav.first+leftPos==43017596) {
+								//	cout << "h: " << h << " r: " << r << " read: " << reads[r] << endl;
+								//}
+
+
+								if  (reads[r].onReverseStrand) nrt=true; else nft=true;
+							}
+
+						}
+					}
+					double mq=-10*log10(1.0-reads[r].mapQual);
+					msq+=mq*mq;
+					n++;
+					if (nft) nf++;
+					if (nrt) nr++;
+				} // foreach read r
+			//	cout << endl;
+				if (n!=0) msq=sqrt(msq/double(n)); else msq=0.0;
+				if (nf+nr>0) support.insert(b);
+				totnf+=nf;
+				totnr+=nr;
+			}  // readidx[b].size()
+
+			if (params.outputGLF && doGLF) {
+				OutputData::Line line(glfData);
+				line.set("msg", "ok");
+				line.set("index", index);
+				line.set("tid", params.tid);
+				line.set("analysis_type",program);
+				line.set("indidx",b);
+				line.set("was_candidate_in_window",1);
+				line.set("lpos",leftPos);
+				line.set("rpos",rightPos);
+				line.set("center_position",candPos);
+				line.set("realigned_position",pav.first+leftPos);
+				line.set("post_prob_variant", exp(logp));
+				line.set("est_freq", freq);
+				line.set("logZ", logz);
+				line.set("nref_all", pav.second.getString());
+				line.set("num_reads", readidx[b].size());
+				line.set("msq",msq);
+				line.set("num_cover_forward", nf);
+				line.set("num_cover_reverse", nr);
+				line.set("num_unmapped_realigned", numUnmappedRealigned);
+				line.set("var_coverage_forward", varCoverage[pav].nf);
+				line.set("var_coverage_reverse", varCoverage[pav].nr);
+
+				if (b==0) {
+					// output haplotypes and frequencies
+
+					ostringstream os;
+					bool ifh = true;
+					for (size_t h=0;h<haps.size();h++) {
+						if (hapFreqs[h]>1.0/double(2*reads.size())) {
+							bool isfirst = true;
+							if (!ifh) os << ";";
+							ifh = false;
+							int nvars = 0;
+							for (map<int, AlignedVariant>::const_iterator it=haps[h].indels.begin();it!=haps[h].indels.end();it++) if (it->second.getString()!="*REF") {
+								nvars++;
+								if (!isfirst) os << ",";
+								isfirst = false;
+								os << leftPos+it->first << "," << it->second.getString();
+							}
+							if (nvars==0) os << "REF";
+							os << ":" << hapFreqs[h];
+						}
+					}
+					line.set("hapfreqs", os.str());
+
+				}
+
+				string likstring;
+
+				for (int n=0;n<3;n++) {
+					ostringstream o;
+					o << lik[n];
+					if (n==0) likstring.append("0/0:"); else if (n==1) likstring.append("0/1:"); else likstring.append("1/1:");
+					likstring.append(o.str());
+					if (n<2) likstring.append(";");
+				}
+				line.set("glf",likstring);
+				glfData.output(line);
+			}
+			// glfOutput <<  PRID << " "  << b << " " << params.tid << " " << candPos << " " << pav.first+leftPos << " " << pav.second.getString() << " " << reads.size() <<  " " << msq << " "  << nf << " " << nr << " " << lik[0] << " " << lik[1] << " " << lik[2] << endl;
+
+
+		} // foreach b
+		posteriors.push_back(HapEstResult(pav.second, pav.first,exp(logp),freq, totnf, totnr));
+	}
+
+
+	cout << "candPos: " << candPos << " numReadOffAllHaps: " << numReadOffAllHaps << " logz: " << logz << endl;
+
+
+
+    if (params.outputPooledLikelihoods) {
+
+            // output which variants are active in which haplotype
+
+
+            stringstream os; os << params.fileName << "." << params.tid << "." << candPos;
+	    string oprefix = os.str();
+
+	    string fname = oprefix;
+	    fname.append(".hapvars");
+            ofstream of(fname.c_str());
+            if (!of.is_open()) {
+                    throw string("Cannot open file ").append(fname).append(" for writing .hapvars file");
+            }
+
+            idx=0;
+            BOOST_FOREACH(PAV pav, allVariants) {
+                    stringstream o;
+                    o << params.tid << " " << leftPos+pav.first << " " << pav.second.getString();
+                    of << o.str() << string(50-o.str().length(),' ');
+                    for (size_t h=0;h<nh;h++) {
+                            of << " " << hapHasVar[h*nv+idx];
+                    }
+
+                    of << endl;
+                    idx++;
+
+            }
+
+            of.close();
+
+
+	    string prefix;
+            stringstream os5; os5 << "EM " << params.tid << " " << candPos << " " << reads.size(); prefix.append(os5.str());
+
+	    fname = oprefix;
+	    fname.append(".hapfreqs");
+	    of.open(fname.c_str());
+	    outputHapsAndFreqs(&of,prefix,haps,hapFreqs, leftPos);
+	    of.close();
+
+            fname.clear();
+            fname=oprefix;
+	    oprefix.append(".liks");
+
+	    cout << "fname: " << fname << endl;
+            of.open(fname.c_str());
+            if (!of.is_open()) {
+                    throw string("Cannot open file ").append(fname).append(" for writing .liks file");
+            }
+
+
+
+            // output all likelihoods
+
+            for (size_t r=0;r<nr;r++) {
+
+                    of << r << " " << bam1_qname(reads[r].getBam()) << " " << log(1.0-reads[r].mapQual) << " " << reads[r].poolID;
+
+
+                    for (size_t h=0;h<nh;h++) {
+                            of << " " << liks[h][r].ll;
+                    }
+
+
+                    for (size_t h=0;h<nh;h++) {
+                            of << " " << liks[h][r].offHap;
+                    }
+                    of << endl;
+            }
+	    of.close();
+
+	    // output alignments
+	    fname = oprefix;
+	    fname.append(".alignments");
+            cout << "fname: " << fname << endl;
+            of.open(fname.c_str());
+            if (!of.is_open()) {
+                    throw string("Cannot open file ").append(fname).append(" for writing .liks file");
+            }
+
+
+
+	    for (size_t r=0;r<nr;r++) {
+		cout << "###" << endl;
+		cout << "read: " << bam1_qname(reads[r].getBam()) << " mpos: " << reads[r].matePos << endl;
+		cout << "isUnmapped: " << reads[r].isUnmapped() << endl;
+		// compute maximum alignment likelihood
+		double lq = 0.0;
+		for (size_t b=0;b<reads[r].size();b++) lq += log(reads[r].qual[b]);
+
+		cout << "Max alignment loglik: " << lq << endl;
+
+		double maxll = -HUGE_VAL;
+		std::set <int> mlhaps;
+		for (int h=nh-1;h>=0;h--) if (liks[h][r]>maxll) { maxll = liks[h][r]; }
+		for (int h=nh-1;h>=0;h--) mlhaps.insert(h); //if (fabs(liks[h][r]-maxll)<0.01) mlhaps.insert(h);
+		BOOST_FOREACH(int hidx, mlhaps) {
+			cout << "r: " << r << " hidx: " << hidx << " maxll:" << maxll << endl;
+			ObservationModelFBMaxErr obs(haps[hidx], reads[r], leftPos, params.obsParams);
+			cout << string(50,' ') << haps[hidx].seq << endl;
+			obs.printAlignment(50);
+		}
+	    }
+
+            of.close();
+    }
+}
+
+#ifndef DIPLOIDGLF
+void DetInDel::diploidGLF(const vector<Haplotype> & haps, const vector<Read> & reads, const vector<vector<MLAlignment> > & liks, vector<double> & hapFreqs, vector <HapEstResult > & posteriors,  uint32_t candPos, uint32_t leftPos, uint32_t rightPos,  OutputData & glfData, int index, const AlignedCandidates & candidateVariants, string program="all")
+
+{
+	size_t nh=haps.size();
+	size_t nr=reads.size();
+
+	vector<int> filtered(nh, 0);
+	map<pair<int, AlignedVariant>, VariantCoverage> varCoverage;
+	filterHaplotypes(haps, reads, liks, filtered, varCoverage, params.filterHaplotypes);
+
+	vector<double> rl(nh*nr,0.0); // read given haplotype likelihoods
+
+	// get read-hap likelihoods and get number of reads that are off all-haplotypes.
+
+	int idx=0;
+	int numReadOffAllHaps=0;
+	for (size_t r=0;r<nr;r++) {
+		int no=1;
+		for (size_t h=0;h<nh;h++) {
+			// initialize read-haplotype likelihoods
+			rl[idx]=liks[h][r].ll;
+			if (!liks[h][r].offHap) no=0;
+			idx++;
+		}
+		if (no) {
+			numReadOffAllHaps++;
+		}
+	}
+
+	// get all variants
+
+	typedef pair<int, AlignedVariant> PAV;
+	const int VARSNP=1;
+	const int VARINDEL=2;
+
+
+	std::set< PAV > allVariants;
+	map<int, std::set<PAV> > allVariantsByPos; //
+
+
+	typedef map<int, AlignedVariant>::const_iterator It;
+	typedef map<int, std::set<PAV> >::const_iterator PIt;
+
+	vector<int> hap_num_indels(nh, 0);
+	vector<int> hap_num_candidate_indels(nh, 0);
+	vector<int> hap_num_snps(nh, 0);
+
+	int ref_hap_idx = -1;
+	for (size_t th=0;th<nh;th++) {
+		const Haplotype & hap=haps[th];
+		//cout << "hap[" << th << "]: ";
+		hap_num_indels[th] = hap.countIndels();
+		hap_num_snps[th] = hap.countSNPs();
+
+		if (hap_num_indels[th] == 0 && hap_num_snps[th] == 0) {
+			//if (ref_hap_idx != -1) cout << string("Already have ref-hap!") << " " << ref_hap_idx << endl;
+			//if (ref_hap_idx!=-1) cout << "RH: " << haps[ref_hap_idx].seq << endl;
+			//cout << "TH: " << haps[th].seq << endl;
+			ref_hap_idx = th;
+			//int h1 = th;
+			//cout << "RRRR IN: " << hap_num_indels[h1] << " SNP: " << hap_num_snps[h1]; cout << " H1:"; for (It it=haps[h1].indels.begin();it!=haps[h1].indels.end();it++) cout << it->first << "," << it->second.getString() << ";" << endl;
+
+		}
+		if (hap_num_indels[th] != 0) {
+			//check how many were candidates in the input file
+			int nc = 0;
+			for (It it=hap.indels.begin();it!=hap.indels.end();it++) {
+				const AlignedVariant & avar = it->second;
+				const AlignedVariant *av = candidateVariants.findVariant(avar.getStartHap()+leftPos, avar.getType(), avar.getString());
+				if (av!=NULL) nc+=1;
+			}
+			hap_num_candidate_indels[th] = nc;
+		}
+	
+
+
+		for (It it=hap.indels.begin();it!=hap.indels.end();it++) {
+			if (!it->second.isRef() && !(it->second.isSNP() && it->second.getString()[3]=='D')) {
+				//cout << " " << it->first << "," << it->second.getString();
+				allVariants.insert(PAV(it->first,it->second));
+				allVariantsByPos[it->first].insert(PAV(it->first,it->second));
+			}
+		}
+		//cout << endl;
+	}
+
+	idx=0;
+	map<int,int> posToPosIdx;
+	vector<int> varPositions;
+	for (PIt pit=allVariantsByPos.begin();pit!=allVariantsByPos.end(); pit++) {
+		posToPosIdx[pit->first]=idx;
+		varPositions.push_back(pit->first);
+		idx++;
+	}
+
+	int numVarPos = allVariantsByPos.size();
+	int nv=int(allVariants.size());
+
+	vector<int> hapVar(nh*numVarPos,0);
+	vector<int> varType(nv+1);
+	vector<PAV> variants(nv+1);
+	idx=1;
+
+
+	BOOST_FOREACH(PAV pav, allVariants) {
+		int type=VARSNP;
+		if (pav.second.isIndel()) type=VARINDEL;
+		varType[idx]=type;
+		int posIdx=posToPosIdx[pav.first];
+		for (size_t h=0;h<nh;h++) {
+			It it=haps[h].indels.find(pav.first);
+			if (it!=haps[h].indels.end() && it->second.getString()==pav.second.getString()) hapVar[h*numVarPos+posIdx]=idx;
+		}
+		variants[idx]=pav;
+
+
+
+		idx++;
+	}
+
+
+
+	/*
+	cout << "allVariants: ";
+	BOOST_FOREACH(PAV pav, allVariants) {
+		cout << " [" << pav.first << " " << pav.second.getString() << "]";
+	}
+	cout << endl;
+	*/
+
+	// check which how many reads to a haplotype with this variant
+
+	std::set<int> readidx;
+	for (size_t r=0;r<nr;r++) readidx.insert(r);
+
+
+
+	// compute marginal posteriors for the individual variants
+	vector<double> prior(nh*nh,0.0), pairs_posterior(nh*nh, 0);
+	// compute for every pair of haplotypes the likelihood of drawing it given priors on the variants and the estimated frequencies
+	for (size_t h1=0;h1<nh;h1++) {
+		for (size_t h2=h1;h2<nh;h2++) {
+			prior[h1*nh+h2]=getHaplotypePrior(haps[h1],haps[h2], leftPos, candidateVariants);
+		}
+	}
+
+
+	vector<int> max_indel_pair(2,-1);
+	vector<int> max_noindel_pair(2,-1);
+
+	double max_ll_indel = -HUGE_VAL;
+	double max_ll_noindel = -HUGE_VAL;
+	for (size_t h1=0;h1<nh;h1++) if (filtered[h1]==0) for (size_t h2=h1;h2<nh;h2++) if (filtered[h2]==0) {
+		double ll=0.0;
+		for (size_t r=0;r<reads.size();r++){
+			ll+=log(0.5)+addLogs(rl[r*nh+h1],rl[r*nh+h2]);
+		}
+		// now we have the log likelihood, store posterior
+		pairs_posterior[h1*nh+h2] = ll + prior[h1*nh+h2];
+
+		// prinhaplotype pair
+		/*		
+		cout << "POST: " << h1 << " " << h2 << " " <<  pairs_posterior[h1*nh+h2] << " PR: " << prior[h1*nh+h2];
+		
+		cout << " IN: " << hap_num_indels[h1] << " SNP: " << hap_num_snps[h1]; cout << " H1:"; for (It it=haps[h1].indels.begin();it!=haps[h1].indels.end();it++) cout << it->first << "," << it->second.getString() << ";";
+		cout << " IN: " << hap_num_indels[h2] << " SNP: " << hap_num_snps[h2]; cout << " H2:"; for (It it=haps[h2].indels.begin();it!=haps[h2].indels.end();it++) cout << it->first << "," << it->second.getString() << ";";
+		cout << endl;
+		*/
+		
+		if (pairs_posterior[h1*nh+h2]>max_ll_indel && (hap_num_candidate_indels[h1]>0 || hap_num_candidate_indels[h2]>0)) {
+			max_ll_indel = pairs_posterior[h1*nh+h2];
+			max_indel_pair[0] = h1;
+			max_indel_pair[1] = h2;
+		}
+		if (pairs_posterior[h1*nh+h2]>max_ll_noindel && (hap_num_candidate_indels[h1]==0 && hap_num_candidate_indels[h2]==0)) {
+			max_ll_noindel = pairs_posterior[h1*nh+h2];
+			max_noindel_pair[0] = h1;
+			max_noindel_pair[1] = h2;
+		}
+
+	}
+
+	// output map-based variant calls
+	
+	double qual = 0.0;
+	double ll_ref = max_ll_noindel;
+	qual = - 10.0*( ll_ref - addLogs(max_ll_indel, ll_ref) )/log(10.0);
+	cout << "ll_ref: " << ll_ref << " max_ll_indel: " << max_ll_indel << " qual: " << qual << endl;
+	if (max_indel_pair[0]==-1 || max_indel_pair[1]==-1) throw string("Could not find indel allele");	
+	if (1) {
+		int numUnmappedRealigned = 0;
+		int hx1 = max_indel_pair[0];
+		int hx2 = max_indel_pair[1];
+		for (size_t r=0;r<reads.size();r++) {
+			if (reads[r].isUnmapped()) {
+				if (liks[hx1][r].offHap == false || liks[hx2][r].offHap == false) {
+					numUnmappedRealigned++;
+				}
+			}
+		}
+
+		map<int, std::set<AlignedVariant> > indels;
+		for (int i=0;i<2;i++) {
+			
+			const Haplotype & hap = haps[ max_indel_pair[i] ];
+
+			for (It it=hap.indels.begin();it!=hap.indels.end();it++) if (!it->second.isRef() || (it->second.isSNP() && it->second.getString()[3]=='D')){
+				indels[it->first].insert(it->second);
+			}
+		}
+
+		for (map<int, std::set<AlignedVariant> >::const_iterator it = indels.begin();it!=indels.end();it++) {
+			
+			double msq = 0;
+			int numf=0, numr=0, n=0;
+			int m =2;
+			if (max_indel_pair[0]==max_indel_pair[1]) m = 1;
+			for (int i=0;i<m;i++) {
+				int h=max_indel_pair[i];
+				It iter = haps[h].indels.find(it->first);
+				if (iter != haps[h].indels.end() && iter->second.isIndel()) {
+				
+					for (int r=0;r<nr;r++) {
+						bool covered=false, nft=false, nrt=false;
+
+					map<int, bool>::const_iterator it2=liks[h][r].hapIndelCovered.find(it->first);
+						if (it2!=liks[h][r].hapIndelCovered.end() && it2->second) covered=true;
+					
+						if (covered) { // hapHasVar[] is to check whether haplotype truely has variant or only the reference
+							if  (reads[r].onReverseStrand) nrt=true; else nft=true;
+							double mq=-10*log10(1.0-reads[r].mapQual);
+							msq+=mq*mq;
+							n++;
+						}
+						if (nft) numf++;
+						if (nrt) numr++;
+					}
+				}
+			}
+
+			if (n!=0) msq=sqrt(msq/double(n)); else msq=0.0;
+	
+			int was_candidate = 0;
+			
+			// determine genotype
+			const std::set< AlignedVariant> & alleles = it->second;
+			string genotype;
+			std::set<string> all_genotype;
+			string nref_all;
+
+			int vc_f = 0;
+			int vc_r = 0;
+			if (1) {
+				const AlignedVariant & avar = *alleles.begin();
+				const AlignedVariant *av = candidateVariants.findVariant(avar.getStartHap()+leftPos, avar.getType(), avar.getString());
+				if (av!=NULL) was_candidate=1;
+				vc_f += varCoverage[PAV(it->first, avar)].nf;
+				vc_r += varCoverage[PAV(it->first, avar)].nr;
+			}
+			
+			string a1="*REF", a2="*REF";
+			bool a1_ref=true, a2_ref=true;
+			It ita1 = haps[hx1].indels.find(it->first);
+			It ita2 = haps[hx2].indels.find(it->first);
+					
+			if (ita1 != haps[hx1].indels.end() && !ita1->second.isRef()) {
+				a1 = ita1->second.getString();
+				a1_ref=false;
+			}
+			if (ita2 != haps[hx2].indels.end() && !ita2->second.isRef()) {
+				a2 = ita2->second.getString();
+				a2_ref=false;
+			}
+			all_genotype.insert(a1);
+			all_genotype.insert(a2);
+
+			//cout << "a1: " << a1 << " a2: " << a2 << " a1ref " << a1_ref << " a2ref " << a2_ref << endl;
+
+			if (a1_ref && a2_ref) throw string("genotyping error");
+
+			if (a1==a2) {
+				genotype = string("1/1");
+				nref_all = a1;
+			} else {
+				
+				if (a1_ref) { 
+					genotype = string("0/1");
+					nref_all = a2;
+				} else if (a2_ref) {
+					genotype = string("0/1");
+					nref_all = a1;
+				} else  {
+					nref_all = a1+','+a2;
+					genotype = string("1/2");
+					
+					if (1) {
+						const AlignedVariant & avar = *alleles.rbegin();
+						const AlignedVariant *av = candidateVariants.findVariant(avar.getStartHap()+leftPos, avar.getType(), avar.getString());
+						if (av!=NULL) was_candidate=1;
+						vc_f += varCoverage[PAV(it->first, avar)].nf;
+						vc_r += varCoverage[PAV(it->first, avar)].nr;
+					}
+				}
+			}
+
+			// determine genotype quality
+			//cout << "POS: " << it->first << endl;
+			double max_ll_altgeno = -HUGE_VAL;
+			for (size_t h1=0;h1<nh;h1++) if (filtered[h1]==0) for (size_t h2=h1;h2<nh;h2++) if (filtered[h2]==0) {
+				if (!( (h1==hx1 && h2 == hx2) || (h2==hx1 && h1 == hx2))) {
+					std::set<string> _alt_genotype;
+					 It it2=haps[h1].indels.find(it->first);
+					 if (it2 == haps[h1].indels.end() || it2->second.isRef()) {
+						 _alt_genotype.insert(string("*REF"));
+					 } else {
+						 _alt_genotype.insert(it2->second.getString());
+					}
+					it2=haps[h2].indels.find(it->first);
+					 if (it2 == haps[h2].indels.end() || it2->second.isRef()) {
+						 _alt_genotype.insert(string("*REF"));
+					 } else {
+						 _alt_genotype.insert(it2->second.getString());
+					}
+			//		 cout << "CALLED: " << h1 << " " << h2 << " geno: " <<  *all_genotype.begin() << " " << *all_genotype.rbegin() << " ALT: " << *_alt_genotype.begin() << " " << *_alt_genotype.rbegin() <<  " " << pairs_posterior[h1*nh+h2] << " " << max_ll_altgeno << endl;
+
+				 	if (_alt_genotype != all_genotype && max_ll_altgeno<pairs_posterior[h1*nh+h2]) {
+						 max_ll_altgeno = pairs_posterior[h1*nh+h2];
+							
+					}
+
+				}
+			}
+			double genoqual = 0.0;
+			genoqual = - 10.0*( max_ll_altgeno - addLogs(max_ll_indel, max_ll_altgeno) )/log(10.0);
+
+
+
+
+	
+
+			ostringstream glfs; glfs << genotype << ":" << genoqual;
+
+
+	 	
+			OutputData::Line line(glfData);
+			line.set("msg", "ok");
+			line.set("index", index);
+			line.set("tid", params.tid);
+			line.set("analysis_type",string("dip.map"));
+			line.set("indidx",0);
+			line.set("lpos",leftPos);
+			line.set("rpos",rightPos);
+			line.set("center_position",candPos);
+			line.set("realigned_position",it->first+leftPos);
+			line.set("was_candidate_in_window",was_candidate);
+			line.set("qual", qual );
+			//line.set("post_prob_variant", exp(logp));
+			//line.set("est_freq", freq);
+			line.set("nref_all", nref_all);
+			line.set("num_reads", readidx.size());
+			line.set("msq",msq);
+			//line.set("numOffAll",numOffBoth);
+			//line.set("num_indel",numMappedIndels);
+			line.set("num_cover_forward", numf);
+			line.set("num_cover_reverse", numr);
+			line.set("var_coverage_forward", vc_f);
+			line.set("var_coverage_reverse", vc_r);
+			line.set("num_unmapped_realigned", numUnmappedRealigned);
+			line.set("glf", glfs.str());
+			//line.set("likrr", lik[0]);
+			//line.set("likrn", lik[1]);
+			//line.set("liknn", lik[2]);
+			glfData.output(line);
+		}
+	}
+	
+
+	for (map<int, std::set<PAV> >::const_iterator it= allVariantsByPos.begin();it!=allVariantsByPos.end();it++)
+	{
+		bool has_variants_in_window = 0;
+		BOOST_FOREACH(PAV pav, it->second) {
+			const AlignedVariant & avar = pav.second;
+			const AlignedVariant *av = candidateVariants.findVariant(avar.getStartHap()+leftPos, avar.getType(), avar.getString());
+			//cout << "VAR "  << avar.getString() << endl;
+			if (av!=NULL) {
+				has_variants_in_window=1;
+				break;
+			}
+		}
+
+
+
+		std::set<int> support;
+		int totnf=0, totnr=0;
+		double log5=log(0.5);
+		int nf=0;
+		int nr=0;
+
+		// compute RMS of mapping qualities
+
+		int pos=it->first;
+		int posIdx = posToPosIdx[pos];
+		double msq=0.0;
+		int n=0;
+
+		//cout << endl;
+		//cout << "pos: " << pos << " posIdx " << posIdx << endl;
+
+		typedef std::set<int> IntGenotype;
+		map<IntGenotype, double> genLiks;
+
+		typedef map<IntGenotype,double>::iterator IGIt;
+
+		double maxll=-HUGE_VAL;
+		int hx1, hx2;
+
+		for (size_t h1=0;h1<nh;h1++) if (filtered[h1]==0) for (size_t h2=h1;h2<nh;h2++) if (filtered[h2]==0) {
+			IntGenotype genotype;
+			int v1=hapVar[h1*numVarPos+posIdx];
+			int v2=hapVar[h2*numVarPos+posIdx];
+
+			genotype.insert(hapVar[h1*numVarPos+posIdx]);
+			genotype.insert(hapVar[h2*numVarPos+posIdx]);
+
+			double logPriorPos=0.0;
+			//cerr << "FIX THIS!\n" << endl;
+//			if ( (v1>0 && varType[v1]==VARSNP) || (v2>0 && varType[v2]==VARSNP)) { logPriorPos += log(params.priorSNP);  } else if ( (v1>0 && varType[v1]==VARINDEL) || (v2>0 && varType[v2]==VARINDEL) ) { logPriorPos+=log(params.priorIndel); };
+
+			AlignedVariant av1, av2;
+			if (v1) av1=variants[v1].second; else av1=AlignedVariant("*REF",-1);
+			if (v2) av2=variants[v2].second; else av2=AlignedVariant("*REF",-1);
+
+			logPriorPos = getPairPrior(av1,av2,leftPos, candidateVariants);
+			double pr=prior[h1*nh+h2]-logPriorPos; // substract prior for this site to obtain likelihood
+
+			//cout << "prior: " << h1 << " " << h2 << ": " << prior[h1*nh+h2] << " logPriorPos " << logPriorPos << endl;
+			double ll=pr;
+			for (size_t r=0;r<reads.size();r++){
+				ll+=log5+addLogs(rl[r*nh+h1],rl[r*nh+h2]);
+			}
+
+			//cout << "genotype: " << *(genotype.begin()) << " " << *(genotype.rbegin()) << " lik: " << ll <<endl;
+
+
+			IGIt igit = genLiks.find(genotype);
+			if (igit==genLiks.end()) {
+				genLiks[genotype]=ll;
+			} else {
+				genLiks[genotype]=addLogs(genLiks[genotype],ll);
+			}
+
+			if (ll>maxll) {
+				maxll=ll;
+				hx1=h1;
+				hx2=h2;
+			}
+
+		}
+		//cout << "hx1: " << hx1 << " hx2: " << hx2 << " postprob: " << maxll << endl;
+
+		// see how many unmapped reads were realigned to the MAP haplotypes
+
+		int numUnmappedRealigned = 0;
+		for (size_t r=0;r<reads.size();r++) {
+			if (reads[r].isUnmapped()) {
+				if (liks[hx1][r].offHap == false || liks[hx2][r].offHap == false) {
+					numUnmappedRealigned++;
+				}
+			}
+		}
+
+		if (params.outputPooledLikelihoods) {
+			string ofLiksFile =  params.fileName;
+			ofLiksFile.append(".check.txt");
+			ofstream ofLiks(ofLiksFile.c_str());
+			if (!ofLiks.is_open()) {
+				throw string("Cannot open file for writing");
+			}
+
+			// output haplotypes
+			//
+			ofLiks << "HAPLOTYPES" << endl;
+			for (size_t h=0;h<haps.size();h++) {
+				ofLiks << h;
+				stringstream varss;
+
+				for(map<int, AlignedVariant>::const_iterator it = haps[h].indels.begin(); it != haps[h].indels.end(); it++) {
+					varss << leftPos+it->first << "," << it->second.getString() << ";";
+				}
+				ofLiks << "\t" << varss.str() << endl;
+			}
+
+			ofLiks << "READS" << endl;
+
+
+			for (size_t r=0;r<reads.size();r++) {
+				int offBoth = 0;
+				if (liks[hx1][r].offHap == true && liks[hx2][r].offHap == true) {
+					offBoth =1;
+				}
+				ofLiks << r << "\t" << bam1_qname(reads[r].getBam()) << "\t" << reads[r].pos << "\t" << reads[r].mapQual;
+				for (size_t h=0;h<haps.size();h++) {
+					ofLiks << "\t" << liks[h][r].ll;
+				}
+				for (size_t h=0;h<haps.size();h++) {
+					ofLiks << "\t" << int(liks[h][r].offHap);
+				}
+				ofLiks << endl;
+
+			}
+			ofLiks.close();
+		}
+#define DEBUGDIPLOIDGLF
+#ifdef DEBUGDIPLOIDGLF
+		if (params.outputPooledLikelihoods) {
+			for (size_t r=0;r<reads.size();r++) {
+				int mhx1 = 0;
+				int mhx2 = 1;
+				
+				cout  << endl << "**READ** " << r << " " << bam1_qname(reads[r].getBam()) << " mapQual: " << reads[r].mapQual << " liks: " << liks[mhx1][r].ll << " " << liks[mhx2][r].ll << " unMapped: " << reads[r].isUnmapped() << endl;
+
+				if (1) {
+					/*
+					cout << string(50,' '); cout << reads[r].seq.seq << endl;
+					cout << haps[hx1].seq << endl;
+					cout << haps[hx2].seq << endl;
+					*/
+					Read newread(reads[r]);
+
+					/*
+					newread.mapQual = 1.0 - 1e-20;
+					newread.complement();
+					newread.reverse();
+					*/
+
+					cout << "first: " << endl;
+					ObservationModelFBMaxErr obs(haps[mhx1], newread, leftPos, params.obsParams);
+					cout << string(50,' ') << haps[mhx1].seq << endl;
+					obs.printAlignment(50);
+
+
+					cout << "second: " << endl;
+					ObservationModelFBMaxErr obs2(haps[mhx2], newread, leftPos, params.obsParams);
+					cout << string(50,' ') << haps[mhx2].seq << endl;
+					obs2.printAlignment(50);
+
+
+				} else {
+					int hidx = hx1; if (liks[hx2][r].ll>liks[hx1][r].ll) hidx = hx2;
+					cout << "hidx: " << hidx << " hx1: " << hx1 << " hx2: " << hx2 << endl;
+					ObservationModelFBMaxErr obs(haps[hidx], reads[r], leftPos, params.obsParams);
+					cout << string(50,' ') << haps[hidx].seq << endl;
+					obs.printAlignment(50);
+				}
+			}
+		}
+
+#endif
+
+
+
+
+		double allmsq=0.0;
+		int numMappedIndels=0;
+
+		int nBQT=0, nmmBQT=0;
+		double mLogBQ=0.0;
+		int nMMLeft=0;
+		int nMMRight=0;
+		int numOffBoth =0;
+
+		BOOST_FOREACH(int r, readidx) {
+			double mq=-10*log10(1.0-reads[r].mapQual);
+			allmsq+=(mq*mq);
+			//cout << " " << 10*log10(1-reads[r].mapQual);
+
+			int mlidx; double ml=-HUGE_VAL;
+			std::set<int> mlis;
+
+			if (liks[hx1][r].offHap && liks[hx2][r].offHap) numOffBoth++;
+
+			if (liks[hx1][r].ll>=liks[hx2][r]) {
+				mlidx=hx1;
+				ml=liks[hx1][r].ll;
+			} else {
+				mlidx=hx2;
+				ml=liks[hx2][r].ll;
+			}
+
+			mlis.insert(mlidx);
+
+			bool nrt=false, nft=false;
+
+			map<int,bool>::const_iterator it;
+			BOOST_FOREACH(int h, mlis) {
+				bool covered=false;
+				numMappedIndels += int(liks[h][r].indels.size());
+				nBQT+=liks[h][r].nBQT;
+				nmmBQT+=liks[h][r].nmmBQT;
+				mLogBQ+=liks[h][r].mLogBQ;
+				if (liks[h][r].nMMLeft>=2) nMMLeft++;
+				if (liks[h][r].nMMRight>=2) nMMRight++;
+
+
+				map<int, AlignedVariant>::const_iterator hit=haps[h].indels.find(pos);
+				if (hit->second.isIndel()) {
+					it=liks[h][r].hapIndelCovered.find(pos);
+					if (it!=liks[h][r].hapIndelCovered.end() && it->second) covered=true;
+				} else if (hit->second.isSNP()) {
+					it=liks[h][r].hapSNPCovered.find(pos);
+					if (it!=liks[h][r].hapSNPCovered.end() && it->second) covered=true;
+				}
+				if (covered) { // hapHasVar[] is to check whether haplotype truely has variant or only the reference
+					if  (reads[r].onReverseStrand) nrt=true; else nft=true;
+					double mq=-10*log10(1.0-reads[r].mapQual);
+					msq+=mq*mq;
+					n++;
+				}
+			}
+			if (nft) nf++;
+			if (nrt) nr++;
+		} // foreach read r
+	//	cout << endl;
+
+		if (n!=0) msq=sqrt(msq/double(n)); else msq=0.0;
+		totnf+=nf;
+		totnr+=nr;
+
+		allmsq=(readidx.size()!=0)?sqrt(allmsq/double(readidx.size())):0;
+
+
+		// recode variant indexes to glf/vcf indexes
+		int nidx=1;
+		map <int, int> toVCFidx;
+		vector<string> alleles;
+		alleles.push_back("R");
+		toVCFidx[0]=0;
+
+		ostringstream oAlleles;
+		ostringstream oCovForward;
+		ostringstream oCovReverse;
+		int first=1;
+		for (size_t h=0;h<nh;h++) {
+			int v = hapVar[h*numVarPos+posIdx];
+			if (v!=0) {
+				map<int,int>::iterator tit = toVCFidx.find(v);
+				if (tit==toVCFidx.end()) {
+					toVCFidx[v]=nidx++;
+					alleles.push_back(variants[v].second.getString());
+					string str=(first==1) ? string(""):string(",");
+
+					oAlleles << str << variants[v].second.getString();
+					oCovForward << str << varCoverage[variants[v]].nf;
+					oCovReverse << str << varCoverage[variants[v]].nr;
+					first = 0;
+				}
+			}
+		}
+
+
+		// compute genotype posterior
+
+		ostringstream o;
+
+		first=1;
+		for (map<IntGenotype, double>::iterator git=genLiks.begin();git!=genLiks.end();git++) {
+			int v1 = *(git->first.begin());
+			int v2 = *(git->first.rbegin());
+			int a1 = toVCFidx[ *(git->first.begin()) ];
+			int a2 = toVCFidx[ *(git->first.rbegin()) ];
+
+			string str=(first==1) ? string(""):string(",");
+			o << str << a1 << "/" << a2 << ":" << git->second;
+
+			double logPrior=0.0;
+			if ( (v1>0 && varType[v1]==VARSNP) || (v2>0 && varType[v2]==VARSNP)) { logPrior += log(params.priorSNP);  } else if ( (v1>0 && varType[v1]==VARINDEL) || (v2>0 && varType[v2]==VARINDEL) ) { logPrior+=log(params.priorIndel); };
+
+			git->second -= logPrior;
+			first = 0;
+		}
+
+
+
+
+
+		if (params.outputGLF) {
+			OutputData::Line line(glfData);
+			line.set("msg", "ok");
+			line.set("index", index);
+			line.set("tid", params.tid);
+			line.set("analysis_type",program);
+			line.set("indidx",0);
+			line.set("lpos",leftPos);
+			line.set("rpos",rightPos);
+			line.set("center_position",candPos);
+			line.set("realigned_position",pos+leftPos);
+			line.set("was_candidate_in_window",has_variants_in_window);
+			line.set("logZ", maxll );
+			//line.set("post_prob_variant", exp(logp));
+			//line.set("est_freq", freq);
+			line.set("nBQT", nBQT);
+			line.set("nmmBQT", nmmBQT);
+			line.set("mLogBQ", mLogBQ/double(nBQT));
+			line.set("nMMLeft", nMMLeft);
+			line.set("nMMRight", nMMRight);
+			line.set("nref_all", oAlleles.str());
+			line.set("num_reads", readidx.size());
+			line.set("msq",allmsq);
+			line.set("numOffAll",numOffBoth);
+			line.set("num_indel",numMappedIndels);
+			line.set("num_cover_forward", nf);
+			line.set("num_cover_reverse", nr);
+			
+			line.set("var_coverage_forward", oCovForward.str());
+			line.set("var_coverage_reverse", oCovReverse.str());
+
+
+			line.set("glf", o.str());
+			line.set("num_unmapped_realigned", numUnmappedRealigned);
+			//line.set("likrr", lik[0]);
+			//line.set("likrn", lik[1]);
+			//line.set("liknn", lik[2]);
+			glfData.output(line);
+		}
+	// glfOutput <<  PRID << " "  << b << " " << params.tid << " " << candPos << " " << pav.first+leftPos << " " << pav.second.getString() << " " << reads.size() <<  " " << msq << " "  << nf << " " << nr << " " << lik[0] << " " << lik[1] << " " << lik[2] << endl;
+
+
+  }
+
+}
+#endif
+
+void DetInDel::estimateHaplotypeFrequencies(const vector<Haplotype> & haps, const vector<Read> & reads, const vector<vector<MLAlignment> > & liks, vector<double> & hapFreqs)
+{
+
+	// estimate haplotype frequencies using EM
+	hapFreqs.clear();
+
+	size_t nh=haps.size();
+	size_t nr=reads.size();
+
+
+
+
+	vector<double> rl(nh*nr,0.0); // read given haplotype likelihoods
+
+	vector<double> z(nh*nr,0.0); // expectations of read-haplotype indicator variables
+	vector<double> pi(nh); // log of haplotype frequencies
+	vector<double> nk(nh,0.0); // counts for each haplotype
+
+	hapFreqs=nk;
+
+	// initialize frequencies
+	for (size_t h=0;h<nh;h++) pi[h]=1.0/double(nh);
+
+	for (size_t h=0;h<nh;h++) for (size_t r=0;r<nr;r++) {
+		// initialize read-haplotype likelihoods
+		rl[h*nr+r]=liks[h][r].ll;
+
+		// initialize expectations of indicator variables
+		z[h*nr+r]=0.5;
+	}
+
+
+	bool converged=false;
+	double tol=params.EMtol;
+
+	double eOld=-HUGE_VAL, eNew;
+
+	cout << "EM HapFreqs:";
+
+	int iter=0;
+	while (!converged) {
+
+		// compute expectation of indicator variables
+		for (size_t h=0;h<nh;h++) nk[h]=0.0;
+
+		int idx=0;
+		for (size_t r=0;r<nr;r++) {
+			double lognorm=-HUGE_VAL;
+			// compute responsibilities
+			for (size_t h=0;h<nh;h++) {
+				z[h*nr+r]=pi[h]+(rl[h*nr+r]);
+				lognorm=addLogs(lognorm, z[h*nr+r]);
+			}
+			// normalize and compute counts
+			for (size_t h=0;h<nh;h++) {
+				z[nr*h+r]-=lognorm;
+				z[nr*h+r]=exp(z[nr*h+r]);
+
+				nk[h]+=z[nr*h+r];
+			}
+		}
+
+		// compute frequencies
+
+		for (size_t h=0;h<nh;h++) {
+			double nph=nk[h]/nr;
+			pi[h]=log(nph);
+		}
+
+
+		idx=0;
+		eNew=0.0;
+		for (size_t h=0;h<nh;h++) {
+
+		for (size_t r=0;r<nr;r++) {
+			// compute responsibilities
+				eNew+=z[idx]*( pi[h]+rl[idx]);
+				idx++;
+			}
+		}
+		//cout << "[" << eNew << "]" << endl;
+		//
+		if (eOld>eNew) throw string("EM Error in estimateHapFreqs");
+		converged=(fabs(eOld-eNew))<tol || iter>25;
+
+		eOld=eNew;
+
+
+		iter++;
+	}
+
+	for (size_t h=0;h<nh;h++) { cout << " " << exp(pi[h]); }
+	cout << endl;
+
+	// output haplotypes and estimated frequencies
+
+	for (size_t h=0;h<nh;h++) hapFreqs[h]=exp(pi[h]);
+}
+
+
+void DetInDel::computePairLikelihoods(const vector<Haplotype> & haps, const vector<Read> & reads, const vector<vector<MLAlignment> > & liks, vector<HapPairLik> & likPairs, bool usePrior, const AlignedCandidates & candidateVariants, int leftPos)
+{
+//	cout << "Computing pair likelihoods...\n";
+	likPairs.clear();
+	size_t lh=haps.size();
+	likPairs.reserve(lh*(lh/2));
+	//const double log10=log(10);
+	double maxLL=-HUGE_VAL; int hpm, hmm;
+	size_t midx;
+	for (size_t hp=0;hp<lh;hp++) for (size_t hm=hp;hm<lh;hm++) {
+		double ll=0.0;
+		HapPairLik hpl;
+		hpl.numIndFirst=0;
+		hpl.numIndSecond=0;
+		hpl.numOffBoth=0;
+		hpl.numOffBothError=0.0;
+		hpl.numFirst=0;
+		hpl.numSecond=0;
+		hpl.h1=hp;
+		hpl.h2=hm;
+		for (size_t r=0;r<reads.size();r++) {
+			ll+=(addLogs(liks[hp][r].ll,liks[hm][r].ll)+log(.5));
+			const MLAlignment & ml1=liks[hp][r];
+			const MLAlignment & ml2=liks[hm][r];
+
+			// record which indel was on which haplotype
+			if (!ml1.offHap && ml1.ll>ml2.ll && ml1.indels.size()!=0) hpl.numIndFirst++;
+			else if (!ml2.offHap && ml2.ll>ml1.ll && ml2.indels.size()!=0) hpl.numIndSecond++;
+			if (ml1.offHapHMQ && ml2.offHapHMQ) { hpl.numOffBoth++; hpl.numOffBothError+=reads[r].mapQual; };
+			if (ml1.ll>=ml2.ll) hpl.numFirst++;
+			if (ml2.ll>=ml1.ll) hpl.numSecond++;
+
+	//		determine read coverage of all the variants
+	//		TODO this part is really slow!
+			for (map<int, bool>::const_iterator it=ml1.hapIndelCovered.begin();it!=ml1.hapIndelCovered.end();it++) if (it->second) if (ml1.ll>=ml2.ll) { if (reads[r].onReverseStrand) hpl.hapIndelCoverage1[it->first].nr++; else hpl.hapIndelCoverage1[it->first].nf++; }
+			for (map<int, bool>::const_iterator it=ml2.hapIndelCovered.begin();it!=ml2.hapIndelCovered.end();it++) if (it->second) if (ml2.ll>=ml1.ll) { if (reads[r].onReverseStrand) hpl.hapIndelCoverage2[it->first].nr++; else hpl.hapIndelCoverage2[it->first].nf++; }
+			for (map<int, bool>::const_iterator it=ml1.hapSNPCovered.begin();it!=ml1.hapSNPCovered.end();it++) if (it->second) if (ml1.ll>=ml2.ll) { if (reads[r].onReverseStrand) hpl.hapSNPCoverage1[it->first].nr++; else hpl.hapSNPCoverage1[it->first].nf++; }
+			for (map<int, bool>::const_iterator it=ml2.hapSNPCovered.begin();it!=ml2.hapSNPCovered.end();it++) if (it->second) if (ml2.ll>=ml1.ll) { if (reads[r].onReverseStrand) hpl.hapSNPCoverage2[it->first].nr++; else hpl.hapSNPCoverage2[it->first].nf++; }
+
+
+		}
+		if (usePrior) ll+=getHaplotypePrior(haps[hp], haps[hm], leftPos, candidateVariants);
+
+		hpl.ll=ll;
+		if (ll>maxLL) {
+			maxLL=ll;
+			hpm=hp;
+			hmm=hm;
+			midx=likPairs.size();
+		}
+	//	cout << "hp: " << hp << " hm: " << hm << " ll: " << ll << endl;
+		likPairs.push_back(hpl);
+	}
+
+
+//	cout << "ML hap: " << hpm << " " << hmm << " midx: " << midx << endl;
+	/*
+	cout << haps[hpm] << endl;
+	cout << "indels: "; for (map<int, AlignedVariant>::const_iterator it=haps[hpm].indels.begin();it!=haps[hpm].indels.end();it++) {
+		cout << "[" << it->first << "," << it->second.getString() << "]";
+	}
+	cout << endl;
+	cout << "coverage: ";
+	for (map<int, VariantCoverage>::const_iterator it=likPairs[midx].hapIndelCoverage1.begin();it!=likPairs[midx].hapIndelCoverage1.end();it++) {
+		cout << "[" << it->first << "," << it->second.nf << "," << it->second.nr << "]";
+	}
+	cout << endl;
+	cout << haps[hmm] << endl;
+	cout << "indels: "; for (map<int, AlignedVariant>::const_iterator it=haps[hmm].indels.begin();it!=haps[hmm].indels.end();it++) {
+		cout << "[" << it->first << "," << it->second.getString() << "]";
+	}
+	cout << endl;
+	for (map<int, VariantCoverage>::const_iterator it=likPairs[midx].hapIndelCoverage2.begin();it!=likPairs[midx].hapIndelCoverage2.end();it++) {
+		cout << "[" << it->first << "," << it->second.nf << "," << it->second.nr << "]";
+	}
+
+	cout << endl;
+	*/
+
+	class SortFunc {
+	public:
+		static bool sortFunc(const HapPairLik & hpl1, const HapPairLik & hpl2)
+		{
+			// sort in decreasing order
+			return hpl1.ll>hpl2.ll;
+		}
+	};
+	sort(likPairs.begin(), likPairs.end(),SortFunc::sortFunc);
+}
+
+void DetInDel::statisticsHaplotypePair(const vector<Haplotype> & haps, const vector<Read> & reads, const vector<vector<MLAlignment> > & liks, HapPairLik & hpl, OutputData::Line & line)
+{
+	hpl.numIndFirst=0;
+	hpl.numIndSecond=0;
+	hpl.numOffBoth=0;
+	hpl.numOffBothError=0.0;
+	hpl.numFirst=0;
+	hpl.numSecond=0;
+	int hp=hpl.h1;
+	int hm=hpl.h2;
+	for (size_t r=0;r<reads.size();r++) {
+		const MLAlignment & ml1=liks[hp][r];
+		const MLAlignment & ml2=liks[hm][r];
+
+		if (!ml1.offHap && ml1.ll>ml2.ll && ml1.indels.size()!=0) hpl.numIndFirst++;
+		else if (!ml2.offHap && ml2.ll>ml1.ll && ml2.indels.size()!=0) hpl.numIndSecond++;
+		if (ml1.offHapHMQ && ml2.offHapHMQ) { hpl.numOffBoth++; hpl.numOffBothError+=reads[r].mapQual; };
+		if (ml1.ll>=ml2.ll) hpl.numFirst++;
+		if (ml2.ll>=ml1.ll) hpl.numSecond++;
+
+//		determine read coverage of all the variants
+//		TODO this part is really slow!
+		for (map<int, bool>::const_iterator it=ml1.hapIndelCovered.begin();it!=ml1.hapIndelCovered.end();it++) if (it->second) if (ml1.ll>=ml2.ll) { if (reads[r].onReverseStrand) hpl.hapIndelCoverage1[it->first].nr++; else hpl.hapIndelCoverage1[it->first].nf++; }
+		for (map<int, bool>::const_iterator it=ml2.hapIndelCovered.begin();it!=ml2.hapIndelCovered.end();it++) if (it->second) if (ml2.ll>=ml1.ll) { if (reads[r].onReverseStrand) hpl.hapIndelCoverage2[it->first].nr++; else hpl.hapIndelCoverage2[it->first].nf++; }
+		for (map<int, bool>::const_iterator it=ml1.hapSNPCovered.begin();it!=ml1.hapSNPCovered.end();it++) if (it->second) if (ml1.ll>=ml2.ll) { if (reads[r].onReverseStrand) hpl.hapSNPCoverage1[it->first].nr++; else hpl.hapSNPCoverage1[it->first].nf++; }
+		for (map<int, bool>::const_iterator it=ml2.hapSNPCovered.begin();it!=ml2.hapSNPCovered.end();it++) if (it->second) if (ml2.ll>=ml1.ll) { if (reads[r].onReverseStrand) hpl.hapSNPCoverage2[it->first].nr++; else hpl.hapSNPCoverage2[it->first].nf++; }
+
+		// record which indel was on which haplotype
+	}
+
+	line.set("num_off_hap", hpl.numOffBoth);
+	line.set("num_mapped_to_first",hpl.numFirst);
+	line.set("num_mapped_to_second",hpl.numSecond);
+}
+
+
+
+void parseRegionString(const string & region, int & start, int & end)
+{
+	string filtered;
+	for(size_t x=0;x<region.size();x++) {
+		char c=region[x];
+		if (c=='-') filtered+=' ';
+		else if (c!=',') filtered+=c;
+	}
+	istringstream is(filtered);
+	string e; is >> e;
+	if (!from_string(start,e,std::dec)) throw string("Cannot parse region for start!");
+	is >> e;
+	if (!from_string(end,e,std::dec)) throw string("Cannot parse region end!");
+}
+
+void getParameters(po::variables_map & vm, DetInDel::Parameters & params)
+{
+	params.maxHap=vm["maxHap"].as<uint32_t> ();
+	params.maxReads=vm["maxRead"].as<uint32_t> ();
+	params.width=vm["width"].as<uint32_t> ();
+	params.mapQualThreshold=vm["mapQualThreshold"].as<double>();
+	params.skipMaxHap=vm["skipMaxHap"].as<uint32_t>();
+	//params.glfNumHap=vm["glfNumHap"].as<uint32_t>();
+	params.inferenceMethod=vm["inferenceMethod"].as<string>();
+	params.minReadOverlap=vm["minReadOverlap"].as<uint32_t>();
+	params.maxReadLength=vm["maxReadLength"].as<uint32_t>();
+	//params.scaleErr=vm["mapScaleError"].as<double>();
+	//params.minCount=vm["minCount"].as<uint32_t>();
+	params.maxHapReadProd=vm["maxHapReadProd"].as<uint32_t>();
+
+	params.priorSNP=vm["priorSNP"].as<double>();
+	params.priorIndel=vm["priorIndel"].as<double>();
+	params.bayesa0=vm["bayesa0"].as<double>();
+	params.bayesType=vm["bayesType"].as<string>();
+
+	
+
+
+	if (vm.count("ref")) {
+		params.alignAgainstReference=true;
+		params.refFileName=vm["ref"].as<string>();
+	} else {
+		params.alignAgainstReference=false;
+	}
+
+	params.obsParams.pError=vm["pError"].as<double>();
+	params.obsParams.pMut=vm["pMut"].as<double>();
+
+	//params.obsParams.baseQualThreshold=vm["baseQualThreshold"].as<double>();
+	//	params.obsParams.fixedBaseQual=vm["fixedBaseQual"].as<double>();
+	params.obsParams.maxLengthIndel=vm["maxLengthIndel"].as<int>();
+	params.obsParams.maxLengthDel=params.obsParams.maxLengthIndel;
+	params.obsParams.mapQualThreshold=vm["capMapQualThreshold"].as<double>();
+	params.obsParams.capMapQualFast=vm["capMapQualFast"].as<double>();
+	//params.obsParams.scaleErr=vm["obsScaleError"].as<double>();
+	//params.obsParams.numE=vm["numE"].as<int>();
+	params.obsParams.padCover = vm["flankRefSeq"].as<int>();
+	params.obsParams.maxMismatch = vm["flankMaxMismatch"].as<int>();
+	params.checkAllCIGARs=vm["checkAllCIGARs"].as<int>();
+
+	params.varFileIsOneBased=vm.count("varFileIsOneBased")?true:false;
+	params.outputRealignedBAM=vm.count("outputRealignedBAM")?true:false;
+	params.analyzeLowFreq=vm.count("compareReadHap")?true:false;
+	params.analyzeLowFreqDiffThreshold=vm["compareReadHapThreshold"].as<double>();
+	params.showHapDist=vm.count("showEmpirical")?true:false;
+	params.showCandHap=vm.count("showCandHap")?true:false;
+	params.showReads=vm.count("showReads")?true:false;
+	params.quiet=vm.count("quiet")?true:false;
+	params.computeML=vm.count("computeML")?true:false;
+	params.computeMAP=vm.count("computeMAP")?true:false;
+	params.doDiploid=vm.count("doDiploid")?true:false;
+
+	params.filterHaplotypes=vm.count("filterHaplotypes")?true:false;
+
+	params.printCallsOnly=vm.count("printCallsOnly")?true:false;
+	params.estimateHapFreqs=vm.count("doPooled")?true:false;
+	params.outputPooledLikelihoods=vm.count("opl")?true:false;
+	params.showHapAlignments=vm.count("showHapAlignments")?true:false;
+	if (vm.count("filterReadAux")) params.filterReadAux=vm["filterReadAux"].as<string>();
+	if (vm.count("processRealignedBAM")) params.processRealignedBAM=vm["processRealignedBAM"].as<string>();
+
+	params.slower=vm.count("faster")?false:true;
+	params.changeINStoN=vm.count("changeINStoN")?true:false;
+	params.outputGLF=true;
+
+
+	// removed options
+/*
+	params.outputRealignedBAM=vm.count("outputRealignedBAM")?true:false;
+	params.obsParams.modelType=vm["modelType"].as<string>();
+	params.mapUnmappedReads=vm.count("mapUnmapped")?true:false;
+	params.obsParams.mapUnmappedReads=vm.count("mapUnmapped")?true:false;
+	params.obsParams.pFirstgLO=vm["pFirstgLO"].as<double>();
+	//params.numOutputTopHap=vm["numOutputTopHap"].as<int>();
+
+*/
+
+}
+
+
+
+int smain(int argc, char *argv[])
+{
+	if (1) {
+		Haplotype hap;
+		Read read;
+
+		//hap.seq=          "ATCGTGTAGCTCTCTGGCTGGCTAGCTGATTGGCTCTTGCC";
+		//read.seq.seq=              "CTCTCTGGCTGGCTAGCGAT";
+		Haplotype ref;
+		//                 012345678901234567890123456789
+		hap.seq=          "ATCGATTCGTGATATATATATTCAATGTAGTCGCTAG";
+		read.seq.seq=     "ATCGATTCGTGATAATATTCAATGTAGTCGCTAG";
+
+
+		//hap.seq=          "ATCGATTCGTGATATATATATTCAATGTAGTCGCTAG";
+		//read.seq.seq=     "ATCGATTCGTGATATATATATAATTCAATGTAGTCGCTAG";
+
+		//                 012345678901234567890123456789
+		//hap.seq=          "ATCGATTCGTGTTTTTTCAATGTAGTCGCTAG";
+		//read.seq.seq=     "ATCGATTCGTGTTTTTCAATGTAGTCGCTAG";
+
+		read.mapQual=1-1e-16;
+
+		ObservationModelParameters obsParams;
+		read.setAllQual(0.99);
+
+		ObservationModelFBMaxErr omfbe(hap, read, 0, obsParams);
+		/*
+		ObservationModelS oms(hap, read, 0, obsParams);
+		HapHash hash(4, hap);
+
+		oms.align(hash);
+		*/
+
+		double ll= omfbe.calcLikelihood();
+
+		cout << "ll: " << ll << endl;
+		cout << string(50,' ') << hap.seq << endl;
+		omfbe.printAlignment(50);
+	}
+	if (0) {
+		Haplotype hap;
+		Read read;
+
+		//hap.seq=          "ATCGTGTAGCTCTCTGGCTGGCTAGCTGATTGGCTCTTGCC";
+		//read.seq.seq=              "CTCTCTGGCTGGCTAGCGAT";
+
+		hap.seq=          "AAAATCACCAACACTTCATAATCTATTTTTTCCCCTGAGGAACTTCCTAAAATGAATAAAAAAAAACCCCAGCCACATCTGCATTTGCAAACAGGAAACTCTGCAAGCCATACTAAGACCAAAGCTTAGTT";
+		read.seq.seq=     "CAAACAGGAAACTCTGCAAGCCATACTAAGACCAAAGCTTAGTTA";
+
+
+		read.mapQual=1-1e-16;
+
+		ObservationModelParameters obsParams;
+		read.setAllQual(0.99);
+
+		ObservationModelFBMaxErr omfbe(hap, read, 0, obsParams);
+		/*
+		ObservationModelS oms(hap, read, 0, obsParams);
+		HapHash hash(4, hap);
+
+		oms.align(hash);
+		*/
+
+		double ll= omfbe.calcLikelihood();
+
+		cout << "ll: " << ll << endl;
+		cout << string(50,' ') << hap.seq << endl;
+		omfbe.printAlignment(50);
+	}
+
+	return 0;
+
+}
+
+
+
+
+
+
+#ifdef DINDEL
+int main(int argc, char *argv[])
+{
+	po::options_description which("[Required] Program option");
+	which.add_options()
+	("analysis", po::value<string>()->default_value("indels"),"Analysis type:\n"
+													          "getCIGARindels:  Extract indels from CIGARs of mapped reads, and infer libary insert size distributions\n"
+															  "indels: infer indels\n"
+															  "realignCandidates: Realign/reposition candidates in candidate file\n");
+
+	po::options_description required("[Required] ");
+	required.add_options()
+	("ref", po::value<string>(),"fasta reference sequence (should be indexed with .fai file)")
+	("outputFile", po::value<string>(),"file-prefix for output results");
+
+	po::options_description baminput("[Required] BAM input. Choose one of the following");
+	baminput.add_options()
+	("bamFile",po::value<string>(), "read alignment file (should be indexed)")
+	("bamFiles",po::value<string>(), "file containing filepaths for BAMs to be jointly analysed (not possible for --analysis==indels");
+
+
+	po::options_description regioninput("[Required for analysis == getCIGARindels]: \nRegion to be considered for extraction of candidate indels.");
+	regioninput.add_options()
+	("region", po::value<string>(),"region to be analysed in format start-end, eg. 1000-2000")
+	("tid", po::value<string>(),"target sequence (eg 'X') ");
+
+	po::options_description varfileinput("[Required for analysis == indels]");
+	varfileinput.add_options()
+	("varFile", po::value<string>(), "file with candidate variants to be tested.")
+	("varFileIsOneBased", "coordinates in varFile are one-based");
+
+	po::options_description output_options("Output options");
+	output_options.add_options()
+	("outputRealignedBAM", "output BAM file with realigned reads")
+	("processRealignedBAM", po::value<string>(),"ABSOLUTE path to script to process realigned BAM file")
+	//("outputGLF", "outputGLF for individuals in each bam file")
+	("quiet", "quiet output");
+	//("printCallsOnly", "print only genotypes where call_lik_ref>0.0001 (only affects --single)");
+
+	po::options_description single_analysis("parameters for analysis==indels option");
+	single_analysis.add_options()
+	("doDiploid", "analyze data assuming a diploid sequence")
+	("doPooled", "estimate haplotype frequencies using Bayesian EM algorithm.\nMay be applied to single individual and pools.");
+
+	po::options_description analysis_opt("General algorithm parameters");
+	analysis_opt.add_options()
+	//("mapUnmapped", "remap unmapped reads for which mate is mapped")
+	("faster","use faster but less accurate ungapped read-haplotype alignment model")
+	("filterHaplotypes","prefilter haplotypes based on coverage")
+	("flankRefSeq",po::value<int>()->default_value(2),"#bases of reference sequence of indel region")
+	("flankMaxMismatch",po::value<int>()->default_value(2),"max number of mismatches in indel region")
+	("priorSNP", po::value<double>()->default_value(1.0/1000.0), "prior probability of a SNP site")
+	("priorIndel", po::value<double>()->default_value(1.0/10000.0), "prior probability of a detected indel not being a sequencing error")
+	("width", po::value<uint32_t>()->default_value(60), "number of bases to left and right of indel")
+	("maxHap", po::value<uint32_t>()->default_value(8), "maximum number of haplotypes in likelihood computation")
+	("maxRead", po::value<uint32_t>()->default_value(10000), "maximum number of reads in likelihood computation")
+	("mapQualThreshold", po::value<double>()->default_value(0.99), "lower limit for read mapping quality")
+	("capMapQualThreshold", po::value<double>()->default_value(100.0), "upper limit for read mapping quality in observationmodel_old (phred units)")
+	("capMapQualFast", po::value<double>()->default_value(45.0), "cap mapping quality in alignment using fast ungapped method\n (WARNING: setting it too high (>50) might result in significant overcalling!)")
+	("skipMaxHap", po::value<uint32_t>()->default_value(200), "skip computation if number of haplotypes exceeds this number")
+	//("glfNumHap", po::value<uint32_t>()->default_value(5), "number of haplotypes per glf-class")
+	//("numOutputTopHap", po::value<int>()->default_value(5), "number of haplotype pairs output to haplotype file")
+	("minReadOverlap", po::value<uint32_t>()->default_value(20),"minimum overlap between read and haplotype")
+	("maxReadLength", po::value<uint32_t>()->default_value(500),"maximum length of reads")
+	("minCount", po::value<uint32_t>()->default_value(1), "minimum number of WS observations of indel")
+	("maxHapReadProd",po::value<uint32_t>()->default_value(10000000), "skip if product of number of reads and haplotypes exceeds this value")
+	("changeINStoN", "change sequence of inserted sequence to 'N', so that no penalty is incurred if a read mismatches the inserted sequence");
+	po::options_description pooled_analysis("parameters for --pooled option");
+	pooled_analysis.add_options()
+	("bayesa0", po::value<double>()->default_value(0.001), "Dirichlet a0 parameter haplotype frequency prior")
+	("bayesType",po::value<string>()->default_value("singlevariant"), "Bayesian EM program type (all or singlevariant or priorpersite)");
+
+
+	po::options_description option_filter("General algorithm filtering options");
+	option_filter.add_options()
+	("checkAllCIGARs",po::value<int>()->default_value(1),"include all indels at the position of the call site")
+	("filterReadAux", po::value<string>(), "match string for exclusion of reads based on auxilary information");
+
+
+	po::options_description obsModel("Observation model parameters");
+	obsModel.add_options()
+	("pError", po::value<double>()->default_value(5e-4), "probability of a read indel")
+	//("modelType", po::value<string>()->default_value("probabilistic"), "probabilistic/threshold")
+	("pMut", po::value<double>()->default_value(1e-5), "probability of a mutation in the read")
+	("maxLengthIndel", po::value<int>()->default_value(5), "maximum length of a _sequencing error_ indel in read [not for --faster option]");
+	//("pFirstgLO",po::value<double>()->default_value(0.01),"probability of transition from off the haplotype to on the haplotype");
+
+	po::options_description libParams("Library options");
+	libParams.add_options()
+	("libFile", po::value<string>(), "file with library insert histograms (as generated by --analysis getCIGARindels)");
+
+
+	po::options_description miscAnalysis("Misc results analysis options");
+	miscAnalysis.add_options()
+	("compareReadHap",  "compare likelihood differences in reads against haplotypes")
+	("compareReadHapThreshold", po::value<double>()->default_value(0.5), "difference threshold for viewing")
+	("showEmpirical", "show empirical distribution over nucleotides")
+	("showCandHap", "show candidate haplotypes for fast method")
+	("showHapAlignments","show for each haplotype which reads map to it")
+	("showReads","show reads")
+	("inferenceMethod",po::value<string>()->default_value("empirical"), "inference method")
+	("opl","output likelihoods for every read and haplotype");
+
+	required.add(which).add(baminput).add(regioninput).add(varfileinput).add(output_options).add(single_analysis).add(analysis_opt).add(pooled_analysis).add(option_filter).add(obsModel).add(libParams).add(miscAnalysis);
+
+	po::variables_map vm;
+
+	try {
+			po::store(po::parse_command_line(argc, argv, required), vm);
+	} catch (boost::program_options::error) {
+			cout << "Error parsing input options. Usage: \n\n" << required <<"\n";
+			exit(1);
+	}
+	po::notify(vm);
+
+	// analysis
+	if (!(vm.count("analysis"))) {
+		cerr << "Error: Specify which analysis (--analysis) is required." << endl;
+		exit(1);
+	}
+
+	 // required
+	if (!(vm.count("ref") && vm.count("outputFile"))) {
+		cerr << "Error: One of the following options was not specified:  --ref --tid or --outputFile" << endl;
+		exit(1);
+	}
+
+	if (vm.count("getCIGARindels") && vm.count("region") && !vm.count("tid")) {
+		cerr << "--tid must be specified if analysis==getCIGARindels and --region option is used. " << endl;
+		exit(1);
+	}
+//#define DEBUGGING
+#ifndef DEBUGGING
+	try {
+#endif
+		// extract required parameters
+		string file;
+		int multipleFiles=0;
+		string analysis=vm["analysis"].as<string>();
+
+		// baminput
+		if (analysis=="indels" || analysis=="getCIGARindels") {
+			if (!(vm.count("bamFile") || vm.count("bamFiles"))) {
+					cerr << "Error: Specify either --bamFile or --bamFiles." << endl;
+					exit(1);
+			}
+
+			if (vm.count("bamFile")) {
+			  file=vm["bamFile"].as< string >();
+			  cout << "Reading BAM file: " << file << endl;
+			} else if (vm.count("bamFiles")) {
+			  file=vm["bamFiles"].as<string>();
+			  multipleFiles=1;
+			}
+		}
+
+		string faFile=vm["ref"].as<string>();
+		string outputFile=vm["outputFile"].as< string >();
+
+		string modelType="probabilistic"; //vm["modelType"].as< string >();
+		DetInDel::Parameters params(string("1"), outputFile, modelType);
+		getParameters(vm, params);
+
+		if (analysis=="getCIGARindels") {
+			GetCandidatesFromCIGAR gcfc;
+			string outputFile=vm["outputFile"].as< string >();// outputFile.append(".variants.txt");
+			fasta::Fasta fa(faFile);
+			if (vm.count("region")) {
+				string tid=vm["tid"].as<string>();
+				string region=vm["region"].as<string>();
+				int start, end;
+				parseRegionString(region, start, end);
+				DetInDel detInDel(file, params, multipleFiles);
+				const vector<MyBam *> &  bams = detInDel.getMyBams();
+
+				cout << "Getting indels from CIGARs in mapped reads from region " << tid << ":" << start << "-" << end << endl;
+				gcfc.getIndelFromCIGARRegion(bams,tid, start, end, outputFile, fa);
+
+			} else {
+				if (multipleFiles) {
+					cerr << "Can extract the full set of indels from only BAM file at a time." << endl;
+					exit(1);
+				}
+				gcfc.get(file, outputFile, faFile);
+			}
+		} else if (analysis=="indels") {
+			if (!vm.count("varFile")) {
+				cerr << "Please specify the file with the candidate variants." << endl;
+				exit(1);
+			}
+
+			string varFile = vm["varFile"].as<string>();
+
+			DetInDel detInDel(file, params, multipleFiles);
+
+			if (vm.count("libFile")) {
+				cout << "Detected library file..." << endl;
+				detInDel.params.mapUnmappedReads=true;
+				detInDel.params.obsParams.mapUnmappedReads=true;
+				detInDel.addLibrary(vm["libFile"].as<string>());
+			}
+			detInDel.params.print();
+
+
+			detInDel.detectIndels(varFile);
+		} else if (analysis == "realignCandidates") {
+			GetCandidatesFromCIGAR gcfc;
+			string outputFile=vm["outputFile"].as< string >(); outputFile.append(".variants.txt");
+
+			if (!vm.count("varFile")) {
+				cerr << "Please specify the file with the candidate variants." << endl;
+				exit(1);
+			}
+
+			string varFile = vm["varFile"].as<string>();
+
+			if (varFile == outputFile) {
+				cerr << "outputFile is same as variant file used for input!" << endl;
+				exit(1);
+			}
+
+			gcfc.realignCandidateFile(varFile, params.varFileIsOneBased,outputFile, faFile);
+		} else {
+			cerr << "Unrecognized --analysis option." << endl;
+			exit(1);
+		}
+#ifndef DEBUGGING
+   }
+   catch (string s) {
+	cout << "Exception: " << s << endl;
+    exit(1);
+   }
+#endif
+   return 0;
+}
+#endif
+
diff --git a/DInDel.hpp b/DInDel.hpp
new file mode 100644
index 0000000..3c60a33
--- /dev/null
+++ b/DInDel.hpp
@@ -0,0 +1,397 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef DINDEL_HPP_
+#define DINDEL_HPP_
+#include <stdlib.h>
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include <boost/tuple/tuple.hpp>
+#include <ext/hash_map>
+
+#include "MyBam.hpp"
+#include "faidx.h"
+#include "Haplotype.hpp"
+#include "ObservationModel.hpp"
+#include "HaplotypeDistribution.hpp"
+#include "ObservationModelFB.hpp"
+//#include "Fast.hpp"
+#include "MLAlignment.hpp"
+#include "Read.hpp"
+#include "StringHash.hpp"
+
+#include "OutputData.hpp"
+#include "Library.hpp"
+#include "VariantFile.hpp"
+
+const int SHIFTSTRAND = 1000000; // used to keep track of forward and reverse matches in ::filterHaplotypes
+
+using namespace std;
+using namespace boost;
+using __gnu_cxx::hash;
+typedef struct
+{
+	double pOff, pOn;
+} HapReadLik;
+
+
+
+
+
+class VariantCoverage {
+public:
+	VariantCoverage()
+	{
+		nf=0;
+		nr=0;
+	}
+	VariantCoverage(int _nf, int _nr) 
+	{
+		nf = _nf;
+		nr = _nr;
+	}
+	int nf, nr; // forward and reverse
+};
+
+class DetInDel
+{
+public:
+	//DetInDel(const string & bfName, const string & tid, const string &outputFileName, const string & modelType) : params(tid, outputFileName, modelType) { fai=NULL; };
+	static int fetchFuncFindInDel(const bam1_t *b, void *data);
+	void findInDels(uint32_t start, uint32_t end, bool report);
+	void detectIndels(const string & variantsFileName);
+	void callVariants(const string & variantsFile);
+	void findInDelsPositionsFile(const string & fileName);
+	string getRefSeq(uint32_t lpos, uint32_t rpos);
+	void empiricalDistributionMethod(int index, const vector<Read> & reads, uint32_t pos, uint32_t leftPos, uint32_t rightPos, const AlignedCandidates & candidateVariants, OutputData & oData, OutputData & glfData);
+	void fastMethod(const vector<Read> & reads, uint32_t pos, uint32_t leftPos, uint32_t rightPos, ostream & output);
+	bool getHaplotypes(vector<Haplotype> & haps, const vector<Read> & reads, uint32_t pos, uint32_t & leftPos, uint32_t & rightPos, const AlignedCandidates & candidateVariants);
+	const vector<MyBam *> & getMyBams() const { return myBams; }
+	class HapPairLik {
+	public:
+		double ll;
+		int h1, h2;
+		int numIndFirst;
+		int numIndSecond;
+		int numFirst, numSecond; // number of reads mapped to first and second
+		int numOffBoth; // number of reads that do not map to either haplotype
+		double numOffBothError;
+		map<int, VariantCoverage> hapIndelCoverage1, hapSNPCoverage1,hapIndelCoverage2, hapSNPCoverage2; // indels and snps in the _haplotype_ covered by the read
+
+		operator double() const { return ll;};
+	};
+
+	class HapEstResult {
+		public:
+			HapEstResult();
+			HapEstResult(const AlignedVariant & _av, int _pos, double _prob, double _freq, int _nrf, int _nrr) {
+				av=_av;
+				pos=_pos;
+				prob=_prob;
+				freq=_freq;
+				nrf=_nrf;
+				nrr=_nrr;
+			};
+			AlignedVariant av;
+			int pos;
+			double prob;
+			double freq;
+			int nrf; // number of reads on reverse strand
+			int nrr; // number of reads on forward strand
+	};
+
+	void addLibrary ( const string & name, const Library & lib)
+	{
+		libraries[name.c_str()]=lib;
+	}
+	void addLibrary ( const string & fileName)
+	{
+		libraries.addFromFile(fileName);
+	}
+
+
+protected:
+	void outputHapsAndFreqs(ostream *output, const string & prefix, const vector<Haplotype> & haps, const vector<double> & freqs, uint32_t leftPos);
+	//void getReads(uint32_t leftPos, uint32_t rightPos, vector<Read> & reads);
+	void getReads(uint32_t leftPos, uint32_t rightPos, vector<Read> & reads, uint32_t & oldLeftPos, uint32_t  & oldRightFetchReadPos, vector<Read *> & readBuffer, bool reset);
+
+	double getMaxHap(Haplotype & h1, Haplotype &h2, HapPairLik & hpl, const vector<Haplotype> & haps, vector<HapPairLik> & likPairs);
+	void outputMaxHap(ostream *output, const string & prefix, const vector<Haplotype> & haps, vector<HapPairLik> & likPairs);
+	void outputTopHaps(ostream *output, const string & prefix, const vector<Haplotype> & haps, vector<HapPairLik> & likPairs, int n);
+	bool alignHaplotypes(vector<Haplotype> & haps, uint32_t pos, uint32_t & leftPos, uint32_t & rightPos,  map<int, set<AlignedVariant> > & variants);
+	bool generateHaplotypes(vector<Haplotype> & haps, uint32_t pos, uint32_t & leftPos, uint32_t & rightPos,  const map<int, set<Variant> > & variants);
+	double getHaplotypePrior(const Haplotype & h1, const Haplotype & h2, int leftPos, const AlignedCandidates & candidateVariants);
+	void computeLikelihoods(const vector<Haplotype> &haps, const vector<Read> & reads, vector<vector<MLAlignment> > & liks, uint32_t leftPos, uint32_t rightPos, vector<int> & onHap);
+
+	void computeHapPosition(const Haplotype & hap, const Read & read, vector<int> & alPos, int leftPos);
+	void computeLikelihoodsFaster(const vector<Haplotype> &haps, const vector<Read> & reads, vector<vector<MLAlignment> > & liks, uint32_t leftPos, uint32_t rightPos, vector<int> & onHap);
+
+	void computePairLikelihoods(const vector<Haplotype> & haps, const vector<Read> & reads, const vector<vector<MLAlignment> > & liks, vector<HapPairLik> & likPairs, bool usePrior, const AlignedCandidates & candidateVariants, int leftPos);
+	void statisticsHaplotypePair(const vector<Haplotype> & haps, const vector<Read> & reads, const vector<vector<MLAlignment> > & liks, HapPairLik & hpl,OutputData::Line & line);
+
+	void estimateHaplotypeFrequencies(const vector<Haplotype> & haps, const vector<Read> & reads, const vector<vector<MLAlignment> > & liks, vector<double> & hapFreqs);
+	void estimateHaplotypeFrequenciesPosterior(const vector<Haplotype> & haps, const vector<Read> & reads, const vector<vector<MLAlignment> > & liks, vector<double> & hapFreqs, map <int, vector<tuple<AlignedVariant, double,double> > > & posteriors, uint32_t pos, uint32_t leftPos, ostream & glfOutput);
+	void estimateHaplotypeFrequenciesBayesEM(const vector<Haplotype> & haps, const vector<Read> & reads, const vector<vector<MLAlignment> > & liks, vector<double> & hapFreqs, vector <HapEstResult > & posteriors,  uint32_t candPos, uint32_t leftPos,   uint32_t rightPos, OutputData & glfData, int index, const AlignedCandidates & candidateVariants,string program);
+	void diploidGLF(const vector<Haplotype> & haps, const vector<Read> & reads, const vector<vector<MLAlignment> > & liks, vector<double> & hapFreqs, vector <HapEstResult > & posteriors,  uint32_t candPos, uint32_t leftPos,  uint32_t rightPos, OutputData & glfData, int index, const AlignedCandidates & candidateVariants, string program);
+
+
+	void debug(const pair<Haplotype, Haplotype> & hp, const vector<Read> & reads,  uint32_t leftPos, uint32_t rightPos);
+	void debug(const pair<Haplotype, Haplotype> & hp1, const pair<Haplotype, Haplotype> & hp2, const vector<Read> & reads,  uint32_t leftPos, uint32_t rightPos);
+	void analyzeDifference(const pair<Haplotype, Haplotype> & hp1, const vector<Read> & reads,  uint32_t leftPos, uint32_t rightPos);
+	void showAlignments(const pair<Haplotype, Haplotype> & hp1, const vector<Read> & reads,  uint32_t leftPos, uint32_t rightPos);
+	void showAlignmentsPerHaplotype(const vector<Haplotype> & haps, const vector<Read> & reads, const vector<vector<MLAlignment> > & liks, uint32_t candPos, uint32_t leftPos);
+
+	double getPairPrior(const AlignedVariant & av1, const AlignedVariant & av2, int leftPos,const AlignedCandidates & candidateVariants);
+
+	void filterHaplotypes(const vector<Haplotype> & haps, const vector<Read> & reads, const vector<vector<MLAlignment> > & liks,  vector<int> & filtered,  map<pair<int, AlignedVariant>, VariantCoverage> & varCoverage, bool doFilter);
+
+	//MyBam myBam;
+
+	//MyBam myBam;
+	vector<MyBam *> myBams;
+	vector<string> myBamsFileNames;
+	LibraryCollection libraries;
+
+
+	class CIGAR : public vector<pair<int,int> >
+	{
+	public:
+		typedef pair<int,int> CIGOp;
+		int refPos;
+	};
+	CIGAR getCIGAR(const Haplotype & hap, const Read & read, const MLAlignment & ml, int refSeqStart);
+	void writeRealignedBAMFile(const string & fileName, const vector<CIGAR> & cigars, const vector<Read> & reads, const vector<int> & onHap, const bam_header_t *bh);
+	void writeUnalignedBAMFile(const string & fileName, const vector<Read> & reads, const vector<int> & onHap, const bam_header_t *bh);
+	class InDel {
+	public:
+		InDel()
+		{
+			count[0]=0;
+			count[1]=0;
+		}
+		typedef enum { In, Del} Type;
+		Type type;
+		size_t count[2];
+	};
+
+public:
+	class Parameters {
+	public:
+		Parameters(const string & _tid, string _fileName, const string & modelType) : obsParams(modelType)
+		{
+			tid=_tid;
+			fileName=_fileName;
+			setDefaultValues();
+		}
+		void setDefaultValues()
+		{
+			bayesa0=0.001;
+			width=30;
+			maxHap=100;
+			skipMaxHap=1000;
+			maxReads=500;
+			mapQualThreshold=0.995;
+			glfNumHap=5;
+			inferenceMethod="empirical";
+			minReadOverlap=5;
+			minCount=2;
+			maxReadLength=40;
+			numOutputTopHap=5;
+			checkAllCIGARs=1;
+			bayesType="all";
+
+			fastWidth=4;
+			analyzeLowFreq=false;
+			analyzeLowFreqDiffThreshold=1.0;
+			showHapDist=true;
+			showHapAlignments=false;
+			showCandHap=false;
+			showReads=false;
+			fastWidthOverlap=4;
+			noIndelWindow=-1;
+			mapUnmappedReads=false;
+			priorIndel=1.0/10000;
+			priorSNP=1.0/1000.0;
+			filterReadAux=string("");
+			quiet=true;
+			computeML=false;
+			computeMAP=false;
+			doDiploid=false;
+			slower=true;
+			estimateHapFreqs=false;
+			printCallsOnly=true;
+			outputPooledLikelihoods=false;
+			filterHaplotypes = false;
+
+			outputGLF=true;
+			outputRealignedBAM=false;
+			processRealignedBAM="no";
+			changeINStoN = false;
+
+
+			EMtol=1e-4;
+		}
+		OutputData makeOutputData(ostream & out)
+		{
+			OutputData oData(out);
+			oData("msg")("index");
+			oData("analysis_type");
+			oData("tid")("lpos")("rpos")("center_position")("realigned_position");
+			oData("ref_all")("num_reads")("num_hqreads");
+			oData("post_prob_variant")("est_freq")("was_candidate_in_window");
+
+			oData("num_mapped_to_first")("num_mapped_to_second");
+			oData("num_off_hap")("loglik_hap_pair")("loglik_next_hap_pair");
+			oData("first_var_cover_forward")("first_var_cover_reverse")("second_var_cover_forward")("second_var_cover_reverse");
+			oData("first_called_all")("second_called_all")("loglik_called_genotype")("loglik_ref_ref")("alt_genotypes");
+			return oData;
+		}
+
+		OutputData makeGLFOutputData(ostream & out)
+		{
+			OutputData oData(out);
+			oData("msg")("index");
+			oData("analysis_type");
+			oData("tid")("lpos")("rpos")("center_position")("realigned_position")("was_candidate_in_window");
+			oData("ref_all")("nref_all")("num_reads");
+			oData("post_prob_variant")("qual")("est_freq")("logZ")("hapfreqs");
+
+			oData("indidx")("msq")("numOffAll")("num_indel")("num_cover_forward")("num_cover_reverse")("num_unmapped_realigned");
+			oData("var_coverage_forward")("var_coverage_reverse");
+			oData("nBQT")("nmmBQT")("mLogBQ")("nMMLeft")("nMMRight");
+			oData("glf");
+			return oData;
+		}
+
+		OutputData makeGLFv2OutputData(ostream & out)
+		{
+			OutputData oData(out);
+			oData("msg")("index");
+			oData("analysis_type");
+			oData("tid")("candidate_position")("realigned_position");
+			oData("ref_all")("nref_all")("num_reads");
+			oData("post_prob_variant")("est_freq");
+
+			oData("indidx")("msq")("num_cover_forward")("num_cover_reverse");
+			oData("glf");
+			return oData;
+		}
+
+
+		void print()
+		{
+			cout << "DetInDel parameters: " << endl;
+			cout << "\ttid: " << tid << " width: " << width << " maxHap: " << maxHap << " maxReads: " << maxReads << " skipMaxHap: " << skipMaxHap << endl;
+			cout << "\toutputFilename: " << fileName << endl;
+			cout << "\tmapQualThreshold: " << mapQualThreshold << endl;
+			//cout << "\tscaleError: " << scaleErr << endl;
+			cout << "\tinferenceMethod: " << inferenceMethod << endl;
+			//cout << "\tglfNumHap: " << glfNumHap << endl;
+			cout << "\tanalyzeLowFreq: " << analyzeLowFreq << endl;
+			cout << "\tanalyzeLowFreqDiffThreshold: " << analyzeLowFreqDiffThreshold << endl;
+			cout << "\tshowHapDist: " << showHapDist << endl;
+			cout << "\tminReadOverlap: " << minReadOverlap << endl;
+			cout << "\tmaxReadLength: " << maxReadLength << endl;
+			//cout << "\tminCount: " << minCount << endl;
+			cout << "\tmaxHapReadProd: " << maxHapReadProd << endl;
+			//cout << "\tfastWidth: " << fastWidth << endl;
+			//cout << "\tfastWidthOverlap: " << fastWidthOverlap << endl;
+			cout << "\tshowCandHap: " << showCandHap << endl;
+			cout << "\tshowReads: " << showReads << endl;
+			cout << "\tfilterHaplotypes: " << filterHaplotypes << endl;
+			cout << "\tnoIndelWindow: " << noIndelWindow << endl;
+			cout << "\tmapUnmappedReads: " << mapUnmappedReads << endl;
+
+			cout << "\tnumOutputTopHap: " << numOutputTopHap << endl;
+
+			cout << "\tcheckAllCIGARs: " << checkAllCIGARs << endl;
+			cout << "\tchangeINStoN: " << changeINStoN << endl;
+			
+
+
+			
+			cout << endl;
+			cout << "\tquiet: " << quiet << endl;
+			cout << "\tprintCallsOnly: " << printCallsOnly << endl;
+			cout << "\tfaster: " << !slower << endl;
+			cout << "\tdoDiploid: " << doDiploid << endl;
+			cout << "\tdoEM: " << estimateHapFreqs << endl;
+
+			cout << "\toutputPooledLikelihoods: " << outputPooledLikelihoods << endl;
+			cout << "\toutputRealignedBAM: " << outputRealignedBAM << endl;
+			cout << "\tprocessRealignedBAM: " << processRealignedBAM << endl;
+			cout << "\tshowHapAlignments: " << showHapAlignments << endl;
+
+			cout << "\tEM tol: " << EMtol << endl;
+			cout << "\tbayesEM a0: " << bayesa0 << endl;
+			cout << "\tbayesType: " << bayesType << endl;
+
+
+			cout << "\tpriorIndel: " << priorIndel << endl;
+			cout << "\tpriorSNP: " << priorSNP << endl;
+
+			//cout << "\tmeanInsert: " << meanInsert << endl;
+			//cout << "\tstdInsert: " << stdInsert << endl;
+
+			cout << "\tfilterReadAux: " << filterReadAux << endl;
+
+			cout << "Observation model parameters: " << endl;
+			obsParams.print();
+		}
+		int noIndelWindow, numOutputTopHap, checkAllCIGARs, minReadOverlap, maxHapReadProd;
+		uint32_t width, maxHap, maxReads, skipMaxHap, glfNumHap,  maxReadLength, minCount, fastWidth, fastWidthOverlap;
+		double checkBaseQualThreshold;
+		double mapQualThreshold, scaleErr, priorIndel, priorSNP, EMtol, bayesa0;
+		string fileName, inferenceMethod, refFileName, tid, filterReadAux, bayesType, processRealignedBAM;
+		bool analyzeLowFreq, showHapDist, showCandHap, showReads, showHapAlignments, alignAgainstReference, mapUnmappedReads, quiet, estimateHapFreqs, doDiploid, computeML, computeMAP, slower,printCallsOnly, outputPooledLikelihoods, filterHaplotypes;
+		bool outputRealignedBAM, outputGLF, varFileIsOneBased, changeINStoN;
+		double analyzeLowFreqDiffThreshold;
+		double meanInsert, stdInsert;
+		ObservationModelParameters obsParams;
+	};
+	Parameters params;
+
+	DetInDel(const string & bfName, const Parameters & _params, int multipleFiles);
+	~DetInDel();
+
+
+	map<uint32_t, InDel> indels;
+	class ScanStats
+	{
+		public:
+		ScanStats()
+		{
+			numUnmappedMate=0;
+		}
+		int numUnmappedMate;
+	};
+	ScanStats scanStats;
+protected:
+	faidx_t *fai;
+
+};
+
+
+
+class FFData
+{
+public:
+	uint32_t start, end;
+	DetInDel *det;
+	map<string, int> unmappedMate;
+	map<int, int > insHisto, delHisto;
+};
+
+#endif /*DINDEL_HPP_*/
diff --git a/Fasta.hpp b/Fasta.hpp
new file mode 100644
index 0000000..d2bdf40
--- /dev/null
+++ b/Fasta.hpp
@@ -0,0 +1,72 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * Fasta.hpp
+ *
+ *  Created on: May 27, 2009
+ *      Author: caa
+ */
+
+#ifndef FASTA_HPP_
+#define FASTA_HPP_
+
+#include <string>
+#include "bam.h"
+#include "faidx.h"
+
+
+namespace fasta { 
+using namespace std;
+class Fasta {
+public:
+	Fasta()
+	{
+		fai=NULL;
+	}
+	Fasta(const string & fileName)
+	{
+		fai=NULL;
+		fai = fai_load(fileName.c_str());
+		if (!fai) {
+			throw string("Fasta: cannot open reference file.");
+		}
+	}
+
+	string getSequence(const string & tid, int start, int end)
+	{
+		char *str;
+		char *ref;
+		str = (char*)calloc(strlen(tid.c_str()) + 30, 1);
+		sprintf(str, "%s:%d-%d", tid.c_str(), start, end);
+		int len;
+		ref = fai_fetch(fai, str, &len);
+		if (len==0) throw string("faidx error, len==0");
+
+		string result(ref);
+		transform(result.begin(), result.end(), result.begin(), ::toupper);
+		free(str);
+		free(ref);
+		return result;
+	}
+	~Fasta()
+	{
+		if (fai) fai_destroy(fai);
+	}
+
+protected:
+	faidx_t *fai;
+};
+}
+#endif /* FASTA_HPP_ */
diff --git a/Faster.cpp b/Faster.cpp
new file mode 100644
index 0000000..7ed09d5
--- /dev/null
+++ b/Faster.cpp
@@ -0,0 +1,785 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * Fast.cpp
+ *
+ *  Created on: Feb 25, 2009
+ *      Author: caa
+ */
+
+#include <string>
+#include <assert.h>
+#include <iostream>
+#include <stdint.h>
+#include <vector>
+#include <list>
+#include <set>
+#include <string>
+#include <map>
+#include <cmath>
+#include <sstream>
+#include <algorithm>
+#include "bam.h"
+#include "Haplotype.hpp"
+#include "Faster.hpp"
+#include "Utils.hpp"
+#include "foreach.hpp"
+using namespace std;
+const int DEBUGS=0;
+
+ObservationModelS::ObservationModelS(const Haplotype & _hap, const Read & r, uint32_t hapStart, const ObservationModelParameters & _params) : params(_params)
+{
+
+	hap_ptr = &_hap;
+	read_ptr = &r;
+	if (params.maxLengthIndel>(int) hap_ptr->size()) throw string("hapSize error.");
+	hlen=(int) hap_ptr->seq.size();
+	rlen=(int) read_ptr->size();
+
+	this->hapStart=hapStart;
+
+	likelihoodComputed=false;
+	bMidError=true;
+	computeBMid();
+	setupReadLikelihoods();
+
+}
+
+void ObservationModelS::computeBMid()
+{
+	const Read & read = *read_ptr;
+	const Haplotype & hap = *hap_ptr;
+	uint32_t hapEnd=hapStart+hap.size();
+	uint32_t mReadStart=uint32_t(read.posStat.first);
+	uint32_t readEnd=mReadStart+uint32_t(read.size())-1;
+	uint32_t olStart, olEnd;
+	int mid;
+
+	bMidError=true;
+	if (mReadStart>hapEnd) {
+		bMid=0;
+	} else if (readEnd<hapStart) {
+		bMid=int(read.size())-1;
+	} else {
+		olStart=(hapStart>mReadStart)?hapStart:mReadStart;
+		olEnd=(hapEnd>readEnd)?readEnd:hapEnd;
+		mid=(int(olEnd)-int(olStart))/2+int(olStart);
+		bMid=mid-int(mReadStart);
+		bMidError=false;
+	}
+
+	if (bMid<0) { bMid=0; };
+	if (bMid>=int(read.size())) { bMid=int(read.size())-1; };
+
+	if (DEBUGS) cout << "bMid: " << bMid << endl;
+
+}
+
+
+void ObservationModelS::setupReadLikelihoods()
+{
+	const Read & read = *read_ptr;
+
+	logMatch.resize(read.size());
+	logMismatch.resize(read.size());
+	cumLogMatch.resize(read.size());
+	// initialize with prior
+	llMatch=0.0;
+	if (params.modelType=="probabilistic") {
+		for (size_t r=0;r<read.size();r++) {
+			double rq=read.qual[r];
+			double pr=rq*(1.0-params.pMut);
+			double eq=log(.25+.75*pr);
+			double uq=log(.75+1e-10-.75*pr);
+			logMatch[r]=eq;
+			logMismatch[r]=uq;
+			llMatch+=eq;
+			cumLogMatch[r]=llMatch;
+		}
+	} else {
+		throw string("Model not implemented.");
+	}
+
+
+	double mq=1.0-read.mapQual;
+	if (-10.0*log10(mq)>params.capMapQualFast) {
+		mq=pow(10.0,-params.capMapQualFast/10.0);
+	}
+
+	pOffFirst=mq;
+	pOffFirstHMQ=1e-10;
+
+	llOff=log(pOffFirst)+llMatch+double(rlen)*log(1.0-params.pError);
+	llOffHMQ=log(pOffFirstHMQ)+llMatch+double(rlen)*log(1.0-params.pError);
+
+}
+
+
+
+void ObservationModelS::AlignHash(const HapHash & hash)
+{
+
+	const Read & read = *read_ptr;
+	hash_map<int,int> hposFreq; // will keep track of frequencies of relative positions of read wrt haplotype
+	hash_map<int,int>::iterator it;
+
+	unsigned int kmer = hash.getKmer();
+
+	size_t x=0, xl=read.size()-kmer;
+
+	unsigned int key=hash.convert(read.seq.seq,x);
+	for (;x<xl+1;x++) {
+		//const set<int> & hpSet=hash.lookup(read.seq.seq,x);
+		const set<int> & hpSet = hash.lookup(key);
+		if (DEBUGS) cout << "hash: " << x << " :";
+		BOOST_FOREACH(int hp, hpSet) {
+
+			int rpfb=hp-x; // relative position of first base wrt haplotype
+			if (DEBUGS) cout << " " << rpfb;
+			it=hposFreq.find(rpfb);
+			// todo weight according to bMid?
+			if (it==hposFreq.end()) hposFreq[rpfb]=1; else it->second++;
+		}
+		if (DEBUGS) cout << endl;
+		if (x!=xl) key = hash.pushBack(key, read.seq.seq[x+kmer]);
+	}
+
+	// sort according to frequency
+	map<int, set<int> > freqToPos;
+	for (it=hposFreq.begin();it!=hposFreq.end();it++) {
+
+		if (DEBUGS) cout << "il : " << it->first << " " << it->second << endl;
+		freqToPos[it->second].insert(it->first);
+	}
+	// do alignment with top 15 frequency hash lookups
+
+	const int maxRelPos=15;
+
+	vector<int> relPos; relPos.reserve(maxRelPos);
+
+	int tot=0;
+
+	for (map<int,set<int> >::reverse_iterator rit=freqToPos.rbegin(); rit!=freqToPos.rend() ;rit++) {
+		BOOST_FOREACH(int rp, rit->second) {
+			if (tot<maxRelPos) {
+				relPos.push_back(rp);
+				if (DEBUGS) cout << "rp: " << rp << " freq: " << rit->first << endl;
+				tot++;
+			} else goto _end;
+		}
+	}
+	_end:
+
+	if (DEBUGS) cout << "done"<<endl;
+	// run HMM with sparse set of positions
+	SStateHMM(relPos);
+
+}
+
+MLAlignment ObservationModelS::align(const HapHash & hash)
+{
+	AlignHash(hash);
+	likelihoodComputed=true;
+	reportVariants();
+	return ml;
+}
+/*
+inline void ObservationModelS::doTransition(int cr, int nr, const vector<int> & state, vector<double> & alpha, vector<double> & bt, const vector<double> & tr, const int & S)
+{
+	int r=cr;
+	if (state[cr]==-1) {
+		// current readbase not fixed
+		if (state[nr]==-1) {
+			// next base is not fixed
+			for (int cs=0;cs<S;cs++) {
+				for (int ns=0;ns<S;ns++) {
+					double nv=obs[r*S+cs]+alpha[r*S+cs]+tr[cs*S+ns];
+					if (nv>alpha[cr*S+ns]+EPS) { alpha[cr*S+ns]=nv; bt[cr*S+ns]=ns; }
+				}
+			}
+
+		} else {
+			// next base is fixed
+			for (int cs=0;cs<S;cs++) {
+				ns=state[nr];
+				double nv=obs[r*S+cs]+alpha[r*S+cs]+tr[cs*S+ns];
+				if (nv>alpha[cr*S+ns]+EPS) { alpha[cr*S+ns]=nv; bt[cr*S+ns]=ns; }
+			}
+		}
+	} else {
+		// current readbase is fixed
+		if (state[nr]==-1) {
+			// next base is not fixed
+			int cs=state[r];
+			for (int ns=0;ns<S;ns++) {
+				double nv=obs[r*S+cs]+alpha[r*S+cs]+tr[cs*S+ns];
+				if (nv>alpha[cr*S+ns]+EPS) { alpha[cr*S+ns]=nv; bt[cr*S+ns]=ns; }
+			}
+		} else {
+			// next base is fixed
+			int cs=state[r];
+			int ns=state[nr];
+			double nv=obs[r*S+cs]+alpha[r*S+cs]+tr[cs*S+ns];
+			if (nv>alpha[cr*S+ns]+EPS) { alpha[cr*S+ns]=nv; bt[cr*S+ns]=ns; }
+		}
+	}
+}
+
+inline void ObservationModelS::doTransitionNF(int cr, int nr, const vector<int> & state, vector<double> & alpha, vector<double> & bt, const vector<double> & tr, const int & S)
+{
+	int r=cr;
+	// next base is not fixed
+	for (int cs=0;cs<S;cs++) {
+		for (int ns=0;ns<S;ns++) {
+			double nv=obs[r*S+cs]+alpha[r*S+cs]+tr[cs*S+ns];
+			if (nv>alpha[cr*S+ns]+EPS) { alpha[cr*S+ns]=nv; bt[cr*S+ns]=ns; }
+		}
+	}
+}
+*/
+
+void ObservationModelS::SStateHMM(vector<int> & relPos)
+{
+	// note that this HMM does not keep track of the last base before the insertion, so after the insertion it may transition not to the next haplotype base
+	// also, the length of the insertion must be present as the difference between one of the positions in relPos vector.
+
+	// int p1 and p2 are relative positions of first readbase with respect to the haplotype
+	if (DEBUGS) cout << "hlen: " << hlen << " rlen: " << rlen << endl;
+	const double EPS=1e-7;
+	int readLen=read_ptr->size();
+
+
+	relPos.push_back(-readLen);
+	std::sort(relPos.begin(), relPos.end());
+
+	mapState=vector<int>(readLen,0);
+
+	//if (DEBUGS){ cout << "relPos: "; for (int x=0;x<relPos.size();x++) cout << " " << relPos[x]; cout << endl; }
+
+
+	int S=relPos.size();
+	int T=2*S;            // total number of states per slice
+
+	// note that obs will encode observation potentials only for the non-inserted states
+	vector<double> tr(S*S, -1000.0), trI(S*S, -1000.0), alpha(readLen*T,-1000.0), obs(readLen*S,0);
+
+	// NOTE alpha is defined as the message that readbase r sends to its neighbour, where neighbour depends on the readbase and bmid
+
+	vector<int> bt(readLen*T,0); // backtracking matrix for Viterbi
+
+	// setup state array
+	// initialize to all undetermined
+	vector<int> state(readLen,-1);
+
+	// initialize obs_lik (log-emission-probabilities) for every read-base
+
+	for (int r=0;r<readLen;r++) {
+		for (int s=0;s<S;s++) {
+			int p1=relPos[s];
+			if (p1+r>=0 && p1+r<hlen) {
+				obs[r*S+s]=(read_ptr->seq.seq[r]==hap_ptr->seq[p1+r])?logMatch[r]:logMismatch[r];
+			} else {
+				// this corresponds to LO/RO in ObservationModelFB
+				obs[r*S+s]=logMatch[r];
+			}
+		}
+
+		// obs[r*S+S-1]=logMatch[r]; // assume match if insertion
+		if (DEBUGS) { cout << "obs: "; for (int s=0;s<S;s++) cout << " " << -int(round(obs[r*S+s])); cout << endl; }
+	}
+
+
+
+
+	// todo : add code to fix state to OffHaplotype if to the left or right of a fixed base?
+
+
+	// setup transition-matrix
+
+	vector<double> prior(T, -1000.0), priorHMQ(T, -1000.0);
+
+ 	// p1 <- p1
+	// p1 <- p2
+	// p1 <- I
+
+	// p2 <- p1
+	// p2 <- p2
+	// p2 <- I
+
+	// I <- p1
+	// I <- p2
+	// I <- I
+
+
+	// setup prior distribution for bMid
+	for (int ins=0;ins<2;ins++) {
+		double pins=(ins==0)?log(1.0-params.pError):log(params.pError);
+		for (int y=0;y<S;y++) {
+			int x=y+ins*S;
+			int hp=relPos[y]+bMid;
+			if (hp>=0 && hp<hlen) {
+				prior[x]=log(1.0-pOffFirst)+pins;
+				priorHMQ[x]=log(1.0-pOffFirstHMQ)+pins;
+			} else {
+				prior[x]=log(pOffFirst)+pins;
+				priorHMQ[x]=log(pOffFirstHMQ)+pins;
+			}
+			if (DEBUGS) cout << "prior[" << x << "]: " << prior[x] << " " << priorHMQ[x] << endl;
+		}
+	}
+
+	double logpInsgNoIns = log(params.pError);
+	double logpInsgIns = -0.25;
+	double logpNoInsgIns = log(1-exp(logpInsgIns));
+	//double logpNoInsgNoIns = log(1.0-params.pError);
+
+
+
+	// transitions between relPos
+	for (int s1=0;s1<S;s1++) for (int s2=0;s2<S;s2++) {
+		double ll=-1000.0;
+		// relpos to relpos
+		// for non-inserted states only deletions are allowed.
+		// you can only transition to a lower relPos from an insertion-state (ie x>=S)
+		if (s1!=s2) {
+			double d=fabs(double(relPos[s1]-relPos[s2]));
+			ll=(d-1.0)*logpInsgIns+log(params.pError);
+			trI[s1*S+s2]=(d-1.0)*logpInsgIns;
+		} else if (s1==s2) {
+			ll=log(1.0-params.pError);
+		}
+
+		// Pr[s1 | s2 ]
+		tr[s1*S+s2]=ll;
+	}
+
+	if (DEBUGS) for (int s1=0;s1<S;s1++) {
+		cout << "tr["<< s1 << "]: "; for (int s2=0;s2<S;s2++) cout << " " << tr[s1*S+s2]; cout << endl;
+	}
+	// from left to bMid
+
+	for (int r=0;r<bMid;r++) {
+		int cr=r;
+		//doTransition(cr, nr, state, alpha, bt, tr);
+
+		for (int cs=0;cs<S;cs++) {
+			double pv=obs[r*S+cs]; if (r) pv+=alpha[(r-1)*T+cs];
+
+			// transition to non-inserted from non-inserted
+			for (int ns=cs;ns<S;ns++) {
+				double nv=pv+tr[cs*S+ns];
+				if (nv>alpha[cr*T+ns]+EPS) { alpha[cr*T+ns]=nv; bt[cr*T+ns]=cs; }
+			}
+
+			// r          <---  r+1
+			// to non-ins from ins
+			int ns=cs+S;
+			double nv=pv+logpNoInsgIns;
+			if (nv>alpha[cr*T+ns]+EPS) { alpha[cr*T+ns]=nv; bt[cr*T+ns]=cs; }
+
+
+			// insertion states
+
+			// r          <---  r+1
+			// ins        <---  ins
+
+			int ics=cs+S;
+			ns=ics;
+			nv=logMatch[r]+logpInsgIns; if (r) nv += alpha[(r-1)*T+ics];
+			if (nv>alpha[cr*T+ns]+EPS) { alpha[cr*T+ns]=nv; bt[cr*T+ns]=ics; }
+
+
+			// ins       <---   noins
+			ics=cs+S;		  //  must transition to a lower relPos in case of insertion and going from left to right
+			for (int ns=0;ns<cs;ns++) if (relPos[cs]-r>=relPos[ns]) {
+				nv=logMatch[r]+trI[cs*S+ns]+logpInsgNoIns; if (r) nv += alpha[(r-1)*T+ics];
+				if (nv>alpha[cr*T+ns]+EPS) { alpha[cr*T+ns]=nv; bt[cr*T+ns]=ics; }
+			}
+
+
+		}
+
+		if (DEBUGS) { cout << "alpha_fw: "; for (int x=0;x<T;x++) cout << " " << alpha[r*S+x]; cout << endl; }
+	}
+
+	if (DEBUGS) cout << endl;
+
+	// from right to bMid
+
+	for (int r=readLen-1;r>bMid;r--) {
+		int cr=r;
+		//doTransition(cr, nr, state, alpha, bt, tr);
+
+		for (int cs=0;cs<S;cs++) {
+			double pv=obs[r*S+cs]; if (r<readLen-1) pv+=alpha[(r+1)*T+cs];
+
+			// transition to non-inserted from non-inserted
+			for (int ns=0;ns<=cs;ns++) {
+				double nv=pv+tr[cs*S+ns];
+				if (nv>alpha[cr*T+ns]+EPS) { alpha[cr*T+ns]=nv; bt[cr*T+ns]=cs; }
+			}
+
+			// r          <---  r-1
+			// to ins     from no-ins
+			double nv=logMatch[r]+logpInsgNoIns; if (r<readLen-1) nv += alpha[(r+1)*T+cs+S];
+
+			if (nv>alpha[cr*T+cs]+EPS) { alpha[cr*T+cs]=nv; bt[cr*T+cs]=cs+S; }
+
+			int ns;
+
+			// insertion states
+
+			// r          <---  r-1
+			// ins        <---  ins
+
+			int ics=cs+S;
+			ns=ics;
+			nv=logMatch[r]+logpInsgIns; if (r<readLen-1) nv+= alpha[(r+1)*T+ics];
+			if (nv>alpha[cr*T+ns]+EPS) { alpha[cr*T+ns]=nv; bt[cr*T+ns]=ics; }
+
+			// r	       <---   r-1
+			// noins       <---   ins
+			ics=cs+S;		  //  must transition to a lower relPos in case of insertion and going from left to right
+			for (int ns=cs+1;ns<S;ns++) if (relPos[cs]>relPos[ns]-r) {
+				nv=obs[r*S+cs]+logpNoInsgIns+trI[cs*S+ns]; if (r<readLen-1) nv +=  alpha[(r+1)*T+cs];
+				if (nv>alpha[cr*T+ns+S]+EPS) { alpha[cr*T+ns+S]=nv; bt[cr*T+ns+S]=cs; }
+			}
+
+
+		}
+			// r               r-1
+			// ins     <-----  noins
+
+		if (DEBUGS) { cout << "alpha_bw: "; for (int x=0;x<T;x++) cout << " " << alpha[r*T+x]; cout << endl; }
+	}
+
+
+	double max=-HUGE_VAL;
+	int xmax=0;
+
+	for (int ins=0;ins<2;ins++)
+	for (int y=0;y<S;y++) {
+		int x=ins*S+y;
+		double obsv=(ins==0)?obs[bMid*S+y]:logMatch[bMid];
+		alpha[bMid*T+x]=obsv+prior[x];
+		if (bMid<readLen-1) alpha[bMid*T+x]+=alpha[(bMid+1)*T+x];
+		if (bMid>0) alpha[bMid*T+x]+=alpha[(bMid-1)*T+x];
+
+		if (alpha[bMid*T+x]>max) {
+			max=alpha[bMid*T+x];
+			xmax=x;
+		}
+	}
+
+	if (DEBUGS) { cout << "alpha_bmid: "; for (int x=0;x<T;x++) cout << " " << alpha[bMid*T+x]; cout << endl; }
+
+	// check position of bMid on haplotype
+
+	int hp=relPos[xmax%S]+bMid;
+	if (hp>=0 || hp < hlen) {
+		// bMid is an insertion
+
+		ml.offHap=false;
+
+	} else {
+		// not an insertion
+		// is it on or off the haplotype?
+
+		ml.offHap=true;
+
+
+	}
+
+
+	ml.ll=max;
+
+	max=-HUGE_VAL;
+	xmax=0;
+
+	if (DEBUGS) cout << "alpha_bmid_HMQ: ";
+	for (int ins=0;ins<2;ins++)
+	for (int y=0;y<S;y++) {
+		int x=ins*S+y;
+		double obsv=(ins==0)?obs[bMid*S+x]:logMatch[bMid];
+		double v=obsv+priorHMQ[x];
+		if (bMid<readLen-1) v+=alpha[(bMid+1)*T+x];
+		if (bMid>0) v+=alpha[(bMid-1)*T+x];
+
+		if (v>max) {
+			max=v;
+			xmax=x;
+		}
+		if (DEBUGS) cout << " " << v;
+	}
+	if (DEBUGS) cout << endl;
+
+	hp=relPos[xmax%S]+bMid;
+	if (hp>=0 || hp < hlen) {
+		// bMid is an insertion
+		ml.offHapHMQ=false;
+
+	} else {
+		// not an insertion
+		// is it on or off the haplotype?
+		ml.offHapHMQ=true;
+	}
+
+	state[bMid]=xmax;
+
+	// backtrack to get the map state
+
+	for (int b=bMid; b>0;b--) {
+		state[b-1]=bt[(b-1)*T+state[b]];
+	}
+
+	for (int b=bMid;b<readLen-1;b++) {
+		state[b+1]=bt[(b+1)*T+state[b]];
+	}
+
+	if (DEBUGS){ cout << "state: "; for (int r=0;r<readLen;r++) cout << "[" << r << " " << read_ptr->seq.seq[r] << " " << state[r] << "]"; cout << endl;}
+
+
+	// convert relative positions to absolute positions, using LO, RO, x convention
+
+	int lhp=1;
+	for (int r=0; r<readLen; r++) {
+		if (state[r]==-1) throw string("error in mapstate fast");
+		if (state[r]<S) {
+			int hp=relPos[state[r]]+r;
+			if (hp>=0 && hp<hlen) {
+				mapState[r]=hp+1;
+				lhp=hp+1;
+			} else if (hp<0) mapState[r]=0; else mapState[r]=hlen; // LO and RO
+			if (DEBUGS) cout << "ms: " << r << " " << state[r] << " " << hp << endl;
+
+		} else {
+			// insertion
+			mapState[r]=hlen+2+lhp;
+
+		}
+		if (DEBUGS) cout << "ms: " << r << " " << state[r] << " mapstate " << mapState[r] << endl;
+	}
+
+
+}
+
+
+void ObservationModelS::reportVariants()
+{
+	int hapSize=hlen;
+	int readSize=rlen;
+	int numS=hapSize+2;
+
+	const Read & read = *read_ptr;
+	const Haplotype & hap = *hap_ptr;
+
+
+	ml.align=string(hapSize, 'R');
+	ml.indels.clear();
+	ml.snps.clear();
+
+	ml.firstBase=-1;
+	ml.lastBase=-1;
+	ml.hapIndelCovered.clear();
+	ml.hapSNPCovered.clear();
+	ml.hpos.clear();
+	ml.hpos.resize(readSize);
+
+
+	int b=0;
+	while (b<readSize) {
+		// only report variants for bases that are on the haplotype
+		int s=mapState[b];
+		if ( (s%numS)>0 && (s%numS)<=hapSize ) {
+			if (s>=numS) { // insertion
+				int pos=(s%numS)-1+1; // position of insertion wrt haplotype MAINTAIN CONVENTION OF INSERTION BEFORE BASE X
+				int len=0; // length of insertion
+				int rpos=b; // start base of insertion in read
+				while (b<readSize && mapState[b]>=numS) {
+					ml.hpos[b]=MLAlignment::INS;
+					b++;
+					len++;
+				}
+				int readStart=rpos;
+				int readEnd=b-1;
+				int hapStart=pos;
+				int hapEnd=pos;
+				string seq=read.seq.seq.substr(rpos,len);
+				ml.indels[pos]=AlignedVariant(string("+").append(seq),  hapStart, hapEnd, readStart, readEnd);
+				b--;
+			} else {
+				ml.hpos[b]=s-1;
+				// update firstBase and lastBase
+				if (ml.firstBase==-1) ml.firstBase=s-1; else if (s-1<ml.firstBase) ml.firstBase=s-1;
+				if (ml.lastBase==-1) ml.lastBase=s-1; else if (s-1>ml.lastBase) ml.lastBase=s-1;
+
+
+				// check for SNP
+				if (read.seq[b]!=hap.seq[s-1]) {
+					string snp;
+					snp+=hap.seq[s-1];
+					snp.append("=>");
+					snp+=read.seq[b];
+					int readStart=b;
+					int readEnd=b;
+					int hapStart=s-1;
+					int hapEnd=s-1;
+
+
+					ml.snps[s-1]=AlignedVariant(snp,hapStart, hapEnd, readStart, readEnd);
+					ml.align[s-1]=read.seq[b];
+				}
+				// check for deletion
+				if (b<readSize-1) {
+					int ns=mapState[b+1];
+					if (ns<numS && ns-s>1) { // make sure next state is not an insertion..
+						int pos=s+1-1;
+						int len=-(ns-s-1);
+						//indels[pos]=ReportVariant(len, hap.seq.substr(pos, -len), b);
+
+						for (int y=pos;y<-len+pos;y++) ml.align[y]='D';
+						int readStart=b;
+						int readEnd=b+1;
+						int hapStart=pos;
+						int hapEnd=pos-len-1;
+						string seq=hap.seq.substr(pos,-len);
+						ml.indels[pos]=AlignedVariant(string("-").append(seq), hapStart, hapEnd, readStart, readEnd);
+					}
+				}
+
+			}
+
+		} else {// on haplotype
+			if (s%numS==0) ml.hpos[b]=MLAlignment::LO; else ml.hpos[b]=MLAlignment::RO;
+
+		}
+		b++;
+	}
+
+	for (map<int,AlignedVariant>::const_iterator it=hap.indels.begin();it!=hap.indels.end();it++) {
+		const AlignedVariant & av=it->second;
+		if (av.isCovered(params.padCover, ml.firstBase, ml.lastBase)) ml.hapIndelCovered[it->first]=true; else ml.hapIndelCovered[it->first]=false;
+	}
+	for (map<int,AlignedVariant>::const_iterator it=hap.snps.begin();it!=hap.snps.end();it++) {
+		const AlignedVariant & av=it->second;
+		if (av.isCovered(params.padCover, ml.firstBase, ml.lastBase)) ml.hapSNPCovered[it->first]=true; else ml.hapSNPCovered[it->first]=false;
+	}
+
+
+}
+
+void ObservationModelS::printAlignment(size_t hapScrPos)
+{
+	// count how many bases in the read are left of the haplotype
+	if (!likelihoodComputed) throw string("Must align() first!");
+	int hapSize=hlen;
+	int readSize=rlen;
+	int numS=hapSize+2;
+
+	const Read & read = *read_ptr;
+	const Haplotype & hap = *hap_ptr;
+
+
+	string leftHap, rightHap;
+	string rhap(hap.size(),' ');
+	string ins;
+
+	bool insact=false;
+	int b=0;
+	while (b<readSize) {
+		// only report variants for bases that are on the haplotype
+		int s=mapState[b];
+		char nuc=read.seq.seq[b];
+		if (s%numS==0) {
+			//
+			leftHap+=nuc;
+		} else if ( (s%numS)>0 && (s%numS)<=hapSize ) {
+			if (s>=numS) { // insertion
+				if (!insact) {
+					insact=true;
+					ins+='[';
+					stringstream os; os << (s%numS);
+					ins.append(os.str());
+					ins+=' ';
+				}
+
+				ins+=nuc;
+
+			} else {
+				if (insact) ins+=']';
+				insact=false;
+				rhap[s-1]=nuc;
+
+				if (b<readSize-1) {
+					int ns=mapState[b+1];
+					if (ns<numS && ns-s>1) {
+						int len=ns-s-1;
+						rhap.replace(s, len, string(len,'_'));
+					}
+
+
+				}
+
+
+			}
+
+		} else {
+			rightHap+=nuc;
+		}
+		b++;
+	}
+	if (insact) ins+=']';
+
+	stringstream os;
+	os << readSize << " " << ml.offHap << " " << ml.indels.size() << " " << ml.firstBase << " " << ml.lastBase << " " << ml.ll << " ";
+	for (map<int,AlignedVariant>::const_iterator it=hap.indels.begin();it!=hap.indels.end();it++) {
+		if  (ml.hapIndelCovered[it->first]) os << "1 "; else os << "0 ";
+	}
+	string prefix=os.str();
+
+	int leftHapSpace=int(hapScrPos)-int(prefix.size());
+	if (leftHapSpace<0) leftHapSpace=0;
+
+	string prLeftHap=string(leftHapSpace,' ');
+
+	if (int(leftHap.size())>leftHapSpace) {
+		prLeftHap=leftHap.substr(leftHap.size()-leftHapSpace, leftHapSpace);
+	} else if (leftHap.size()>0) {
+		prLeftHap.replace(leftHapSpace-leftHap.size(), leftHap.size(), leftHap);
+	}
+
+	cout << prefix<<prLeftHap<<rhap<<rightHap << " " << ins << " read: " << read.seq.seq << endl;
+
+
+	for (map<int,AlignedVariant>::const_iterator it=hap.indels.begin();it!=hap.indels.end();it++) {
+		cout << " " << it->first;
+	}
+	cout << endl;
+
+	cout << endl;
+
+	for (int x=0;x<readSize;x++) {
+		cout << "[" << x << ":" << ml.hpos[x] << "]";
+	}
+	cout << endl;
+}
+
+ObservationModelS::~ObservationModelS()
+{
+
+
+}
+
+
diff --git a/Faster.hpp b/Faster.hpp
new file mode 100644
index 0000000..c6913b9
--- /dev/null
+++ b/Faster.hpp
@@ -0,0 +1,101 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * Faster.hpp
+ *
+ *  Created on: Feb 24, 2009
+ *      Author: caa
+ */
+
+#ifndef FASTER_HPP_
+#define FASTER_HPP_
+#include <string>
+#include <assert.h>
+#include <iostream>
+#include <stdint.h>
+#include <vector>
+#include <list>
+#include <set>
+#include <string>
+#include <map>
+#include <cmath>
+#include "bam.h"
+#include "Haplotype.hpp"
+#include "Read.hpp"
+#include "MLAlignment.hpp"
+#include "ObservationModel.hpp"
+using namespace std;
+
+class ObservationModelS
+{
+
+
+protected:
+	class UngappedAlignment
+	{
+	public:
+		UngappedAlignment()
+		{
+			ll=-HUGE_VAL;
+			relPos=-10000;
+			numMismatch=10000;
+		}
+		UngappedAlignment(double _ll, int _relPos, int _numMismatch)
+		{
+			ll=_ll;
+			relPos=_relPos;
+			numMismatch=_numMismatch;
+		}
+		double ll;
+		int relPos;
+		int numMismatch;
+	};
+public:
+	ObservationModelS() {};
+	ObservationModelS(const Haplotype & _hap, const Read & r, uint32_t hapStart, const ObservationModelParameters & _params);
+	virtual ~ObservationModelS();
+	MLAlignment align(const HapHash & hash);
+
+	//MLAlignment calcLikelihood();
+	//double getLogLikelihood() { calcLikelihood(); return ml.ll; };
+	// void changeHaplotype(const Haplotype & newHap);
+	void printAlignment(size_t hapScrPos);
+	void printStatistics();
+	ObservationModelParameters params;
+
+protected:
+	void computeBMid();
+	void setupReadLikelihoods();
+	void Align();
+	void reportVariants();
+	inline void doTransition(int cs, int nr, const vector<int> & state, vector<double> & alpha, vector<double> & bt, const vector<double> & tr, const int & S);
+	inline void doTransitionNF(int cs, int nr, const vector<int> & state, vector<double> & alpha, vector<double> & bt, const vector<double> & tr, const int & S);
+	void SStateHMM(vector<int> & relPos);
+	void AlignHash(const HapHash & hash);
+	MLAlignment ml;
+	vector<double> logMatch, cumLogMatch, logMismatch;
+	vector<int> mapState;
+	double llMatch; //log likelihood when all bases in the read match
+	int bMid, hlen, rlen;
+	double llOff, llOffHMQ, pOffFirst, pOffFirstHMQ;
+
+	const Haplotype *hap_ptr;
+	const Read *read_ptr;
+	size_t hapStart;
+	bool  likelihoodComputed, bMidError;
+};
+
+
+#endif /* FASTER_HPP_ */
diff --git a/GetCandidates.cpp b/GetCandidates.cpp
new file mode 100644
index 0000000..14a0da6
--- /dev/null
+++ b/GetCandidates.cpp
@@ -0,0 +1,498 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * GetCandidates.cpp
+ *
+ *  Created on: Aug 27, 2009
+ *      Author: caa
+ */
+
+#include <fstream>
+#include <string>
+#include "MyBam.hpp"
+#include "foreach.hpp"
+#include "ObservationModelSeqAn.hpp"
+#include "GetCandidates.hpp"
+#include "Variant.hpp"
+#include "VariantFile.hpp"
+#include "bam.h"
+#include "sam.h"
+#include "Fasta.hpp"
+#include "StringHash.hpp"
+#include <set>
+#include "foreach.hpp"
+using namespace std;
+
+int GetCandidatesFromCIGAR::getIndelFromCIGARFetchFunc(const bam1_t *b, void *data)
+{
+	CFFData & dat = *( (CFFData *) data);
+	vector<CIGARindel>  indels;
+	HMap::iterator it;
+	getIndelFromCIGAR(b, indels);
+	BOOST_FOREACH(CIGARindel id, indels) {
+		it = dat.hmap.find(id.refpos);
+		if (it==dat.hmap.end()) dat.hmap[id.refpos][id]=1; else (it->second)[id]++;
+	}
+	return 0;
+}
+
+void GetCandidatesFromCIGAR::getIndelFromCIGARRegion(const vector<MyBam *> & myBams, const string & tid, int start, int end, const string & outputFileName, fasta::Fasta & fa)
+{
+	CFFData data;
+
+	for (size_t b=0;b<myBams.size();b++) {
+		bam_fetch(myBams[b]->bf, myBams[b]->idx, myBams[b]->getTID(tid), start, end, &data, &GetCandidatesFromCIGAR::getIndelFromCIGARFetchFunc);
+	}
+
+	ofstream ofile(outputFileName.c_str());
+	if (!ofile.is_open()) throw string("Cannot open variants file ").append(outputFileName).append(" for writing.");
+	outputIndels(tid, data.hmap,ofile,fa,1);
+	ofile.close();
+}
+
+void GetCandidatesFromCIGAR::getIndelFromCIGAR(const bam1_t *b, vector<CIGARindel> & indels)
+{
+	const bam1_core_t *c=&b->core;
+	uint32_t *cigar=bam1_cigar(b);
+	uint32_t k, l=0;
+	uint32_t refPos = c->pos;
+	int lastop=-1;
+	uint32_t lastPos=refPos;
+	for (k = 0; k < c->n_cigar; ++k) {
+		// cout << "cigar #" << k << endl;
+		int op = cigar[k] & BAM_CIGAR_MASK;
+		int32_t len=cigar[k] >> BAM_CIGAR_SHIFT;
+		string seq;
+
+		if (op==BAM_CINS || op==BAM_CMATCH || op==BAM_CSOFT_CLIP) {
+			for(int32_t x=0;x<len;x++) {
+				if (op==BAM_CINS) {
+					seq+=( bam_nt16_rev_table[ bam1_seqi(bam1_seq(b), l) ] );
+				}
+				l++;
+			}
+		} else if (op==BAM_CDEL) {
+			seq.insert(0, len, 'D');
+		}
+
+		if (op==BAM_CINS || op==BAM_CDEL) {
+			int ilen=len; if (op==BAM_CDEL) ilen=-ilen;
+			indels.push_back(CIGARindel(refPos, ilen, seq));
+		}
+
+		// update position for the next cigar
+		lastPos=refPos;
+		if (op == BAM_CMATCH || op == BAM_CDEL || op==BAM_CREF_SKIP) {
+			refPos+=(uint32_t) len;
+		} else if (op!=BAM_CINS && op != BAM_CSOFT_CLIP && op != BAM_CHARD_CLIP) throw string("I don't know how to smoke this CIGAR");
+		lastop=op;
+	}
+}
+
+vector<AlignedVariant> GetCandidatesFromCIGAR::alignCIGAR(const string & tid, const CIGARindel & id, fasta::Fasta & fa)
+{
+
+	vector<AlignedVariant> variants;
+
+	ObservationModelParameters alignParams("probabilistic");
+	seqan::Score<int> score(-1, -460, -100,-960);
+
+	Read rh1;
+	rh1.pos=0;
+	rh1.posStat.first=0;
+	rh1.mapQual=1.0-1e-32;
+
+	map<int, AlignedVariant> alIndel, alSNP;
+
+	int width=params.alignWindow;
+
+	if (abs(id.len)>width/3) width=abs(id.len)*3;
+
+	int start=id.refpos-width;
+	int end=id.refpos+width;
+
+	string hap;
+	try {
+		hap=fa.getSequence(tid, start+1, end+1);
+	} catch (string s) {
+		cerr << "error: "<< s << endl;
+		cerr << "start: " << start << " end: " << endl;
+		return vector<AlignedVariant>();
+	}
+	//int startRef=start-params.refPad;
+	//int refEnd=end+params.refPad;
+
+	int startRef=start;
+	int refEnd=end;
+
+	string ref;
+	try {
+		ref=fa.getSequence(tid, startRef+1, refEnd+1);
+	} catch (string s) {
+		cerr << "error: "<< s << endl;
+		cerr << "startRef: " << startRef << " refEnd: " << refEnd<< endl;
+		return vector<AlignedVariant>();
+	}
+
+	Haplotype hRef; hRef.append(ref);
+
+
+
+	// create haplotype with indel
+
+
+	int pos=id.refpos-start;
+
+	int testlen = (id.len>0)?0:-id.len;
+	if (hap.size()<pos+testlen) {
+		cerr << "Cannot align variant " << id.refpos << " " << id.len << " " << id.seq << endl;
+		return variants;
+	}
+
+	if (id.len<0) {
+		hap.erase(pos,-id.len);
+	} else if (id.len>0) {
+		hap.insert(pos, id.seq);
+	}
+
+	//cout << "hap: " << hap << endl;
+
+	// align indel
+
+	rh1.seq.seq=hap; // sequence with indel
+	rh1.setAllQual(1.0-1e-16);
+
+	try {
+		ObservationModelSeqAn om(hRef, rh1, 0, alignParams, score);
+		string align;
+		om.align();
+		const MLAlignment & ml=om.getMLAlignment();
+		for(map<int, AlignedVariant>::const_iterator it=ml.indels.begin();it!=ml.indels.end();it++) if (it->second.getType()==AlignedVariant::INS  || it->second.getType()==AlignedVariant::DEL) {
+			const AlignedVariant & aid = it->second;
+			int pos=startRef+it->first;
+			variants.push_back(AlignedVariant(aid.getString(),pos,pos,-1,-1));
+		}
+	} catch (const bad_alloc & ) {
+		cout << "SeqAN Alloc error:  hRef.size(): " << hRef.size() << " rh1.size(): " << rh1.size() << endl;
+		// cout << "hRef: " << hRef << endl;
+		// cout << "rh1: " << rh1 << endl;
+	}
+
+
+	return variants;
+
+}
+
+void GetCandidatesFromCIGAR::outputIndels(const string & tid, const hash_map<int,map<CIGARindel, int> > & hmap, ofstream & ofile, fasta::Fasta & fa, int outputType=1)
+{
+	hash_map<int,map<CIGARindel, int> >::const_iterator it=hmap.begin();
+	hash_map<int,map<AlignedVariant, int> > realigned;
+
+
+	/*
+	ALWAYS realign indel
+	if (fastaName.empty()) {
+		BOOST_FOREACH(CIGARindel id, indels) {
+			it = hmap.find(id.refpos);
+			if (it==hmap.end()) hmap[id.refpos][id.seq]=1; else (it->second)[id.seq]++;
+			stringstream os;
+			os << tid << " " << id.refpos << " " << id.len << " " << id.seq;
+			cout << os.str() << endl;
+		}
+	} else {
+	*/
+		// realign indel
+	for  (it=hmap.begin();it!=hmap.end();it++) {
+		for (map<CIGARindel, int>::const_iterator i2=it->second.begin();i2!=it->second.end();i2++) {
+			const CIGARindel & id=i2->first;
+			//
+			//cout << "Here " << tid << " " << id.refpos << " " << id.len << " " << id.seq <<endl;
+			vector<AlignedVariant> indels;
+			indels=alignCIGAR(tid, id, fa);
+			BOOST_FOREACH(AlignedVariant aid, indels)  if (aid.getType()==AlignedVariant::INS  || aid.getType()==AlignedVariant::DEL) {
+				realigned[aid.getStartHap()][aid]=i2->second;
+			}
+
+		}
+	}
+		
+	std::set<int> positions;
+	for  (hash_map<int,map<AlignedVariant, int> >::const_iterator it=realigned.begin();it!=realigned.end();it++) {
+		positions.insert(it->first);
+	}
+
+
+	//for  (hash_map<int,map<AlignedVariant, int> >::const_iterator it=realigned.begin();it!=realigned.end();it++) {
+	for (std::set<int>::const_iterator posit = positions.begin(); posit != positions.end(); posit++) {
+		const map<AlignedVariant, int>  & _variants = realigned[*posit];
+		ostringstream ovar, ocnt;
+		ovar << tid;
+		ovar << " " << *posit;
+		for (map<AlignedVariant, int>::const_iterator i2=_variants.begin();i2!=_variants.end();i2++) {
+			const AlignedVariant & aid = i2->first;
+			int len=aid.size();
+			if (aid.getType()==AlignedVariant::DEL) len=-len;
+			if (outputType==1) {
+				ovar << " " << aid.getString();
+				ocnt << " " << i2->second;
+			} else if (outputType==2) {
+				ovar << " " << len << " " << aid.getSeq();
+				ocnt << " " << i2->second;
+			} else throw string("Huh?");
+		}
+		ofile << ovar.str() << " #" << ocnt.str() << endl;
+
+	}
+
+}
+
+void GetCandidatesFromCIGAR::realignCandidateFile(const string & _varFile, bool isOneBased, const string & outputFileName, const string & fastaName)
+{
+	hash_map<int,map<CIGARindel, int> > hmap;
+	hash_map<int,map<CIGARindel, int> >::iterator it;
+
+	fasta::Fasta fa(fastaName);
+
+	VariantFile vf(_varFile);
+
+	ofstream ofile(outputFileName.c_str());
+	if (!ofile.is_open()) throw string("Cannot open ").append(outputFileName).append(" for writing CIGAR indels.");
+
+	cout << "Realigning indels from variants file: " << _varFile << endl;
+
+	string ctid="";
+	while (!vf.eof()) {
+			vector<Variant> variants;
+			VariantFile::Candidates cand=vf.getLine(isOneBased);
+			if (cand.variants.empty()) continue;
+
+			if (cand.tid!=ctid) {
+				if (hmap.size()) {
+					outputIndels(ctid, hmap,ofile,fa);
+					cout << "Wrote realigned candidate indel for target " << ctid << " to file " << outputFileName << endl;
+				}
+				hmap.clear();
+				ctid=cand.tid;
+			}
+
+			BOOST_FOREACH(Variant var, cand.variants) if (var.isIndel()) {
+				int len=var.size();
+				if (var.getType()==Variant::DEL) len=-len;
+				CIGARindel id(cand.pos,len, var.getSeq());
+				it = hmap.find(id.refpos);
+				if (it==hmap.end()) hmap[id.refpos][id]=1; else (it->second)[id]++;
+			}
+	}
+
+	outputIndels(ctid, hmap,ofile,fa);
+	cout << "Wrote realigned candidate indels for target " << ctid << " to file " << outputFileName << endl;
+
+
+	ofile.close();
+}
+
+void GetCandidatesFromCIGAR::outputLibraries(LibInsertSize & libInsertSize, const string & outputFile)
+{
+
+	// open file
+	ofstream ofile(outputFile.c_str());
+	if (!ofile.is_open()) throw string("Cannot open ").append(outputFile).append(" for writing libraries.");
+
+	for (LibIterator libit = libInsertSize.begin();libit!=libInsertSize.end();libit++) {
+		string lib = string(libit->first);
+		// compute mean and std
+		InsertSizes & insertSizes = libit->second;
+		InsIterator insit;
+
+		long int tot = 0;
+		double mean = 0.0, std = 0.0;
+
+		std::set<int> isizes;
+
+		for (insit = insertSizes.begin(); insit!=insertSizes.end();insit++) {
+			tot += insit->second;
+			isizes.insert(insit->first);
+		}
+		
+		double cum = 0;
+		int pct = int ( 0.9999 * double(tot));
+		int median = tot/2;
+		int max_isize = -1;
+		int median_isize = -1;
+		for (std::set<int>::const_iterator it = isizes.begin();it!=isizes.end();it++) {
+			cum += insertSizes[*it];
+			if (median_isize == -1 && cum>median) {
+				median_isize = *it;
+			}
+		}
+		isizes.clear();
+		max_isize = median_isize * 10;
+		//cout << "tot: " << tot << " pct: " << pct << " cum: " << cum << " max_isize: " << max_isize <<  " median: " << median <<  " median_isize: " << median_isize << endl;
+
+
+		double dtot = double(tot);
+		for (insit = insertSizes.begin(); insit!=insertSizes.end();insit++) if (insit->first<max_isize) {
+//			cout << "isize: " << insit->first << " count: " << insit->second << endl;
+			mean += double(insit->first)*double(insit->second)/dtot;
+		}
+		for (insit = insertSizes.begin(); insit!=insertSizes.end();insit++) if (insit->first<max_isize) {
+			double dist = double(insit->first)-mean;
+			std += double(insit->second)/dtot*dist*dist;
+		}
+		cout << "Library: " << lib << " mean: " << mean << " stddev: " << sqrt(std) << endl;
+		// create histogram in vector
+		int len = int(mean+5*sqrt(std));
+		vector<long int> histo(len,2), inthisto(len,2);
+
+		for (insit = insertSizes.begin(); insit!=insertSizes.end();insit++) {
+			int isize = insit->first;
+			if (isize<len) {
+				histo[isize]=insit->second;
+			}
+		}
+
+		// smooth histogram out a little
+		int L = 5;
+		for (int i=0;i<len;i++) {
+			int min = i-L; if (min<0) min = 0;
+			int max = i+L; if (max>len) max = len;
+			int n = 0;
+			long int sum = 0;
+			for (int j=min;j<max;j++,n++) {
+				sum += histo[j];
+			}
+			inthisto[i] = (sum+1)/(n+1);
+		}
+
+
+		// write histogram to file
+		ofile << "#LIB " << lib << endl;
+		for (int i=0;i<len;i++) {
+			ofile << i << " " << inthisto[i] << endl;
+		}
+	}
+	ofile.close();
+}
+void GetCandidatesFromCIGAR::get(const string & _bamFile, const string & outputFileName, const string & fastaName)
+{
+	// also get histogram
+	LibInsertSize libInsertSize;
+
+	fasta::Fasta fa(fastaName);
+
+	samfile_t *bf;
+	bf=samopen(_bamFile.c_str(), "rb", 0);
+	bam1_t *b=bam_init1();
+
+	string outputFileVariants = outputFileName;
+	string outputFileLibraries = outputFileName;
+	outputFileVariants.append(".variants.txt");
+	outputFileLibraries.append(".libraries.txt");
+
+
+
+
+	ofstream ofile(outputFileVariants.c_str());
+	if (!ofile.is_open()) throw string("Cannot open ").append(outputFileName).append(" for writing CIGAR indels.");
+
+	hash_map<int,map<CIGARindel, int> > hmap;
+	hash_map<int,map<CIGARindel, int> >::iterator it;
+
+	int oldtid=-1;
+	string _oldtid="";
+	cout << "Parsing indels from CIGAR strings..." << endl;
+	long int numread = 0;
+
+
+	string defaultlib("dindel_default");
+	while (samread(bf,b)>=0) {
+		
+		int btid = b->core.tid;
+		if (btid<0) continue; // unmapped read
+		const char *tidptr = bf->header->target_name[(b->core).tid];
+		if (!tidptr) continue;
+		string tid=string(tidptr);
+		if ((b->core).tid!=oldtid) {
+			if (oldtid!=-1) {
+				outputIndels(_oldtid, hmap,ofile,fa);
+				cout << "Wrote indels in CIGARS for target " << _oldtid << " to file " << outputFileName << endl;
+			}
+			oldtid=(b->core).tid;
+			_oldtid=tid;
+			hmap.clear();
+
+		}
+
+		vector<CIGARindel> indels;
+		getIndelFromCIGAR(b, indels);
+		BOOST_FOREACH(CIGARindel id, indels) {
+			it = hmap.find(id.refpos);
+			if (it==hmap.end()) hmap[id.refpos][id]=1; else (it->second)[id]++;
+		}
+
+		// get insertsize
+		//cout << int(b->core.flag & BAM_FPAIRED) << " " << int (b->core.flag & BAM_FPROPER_PAIR) << " tid: " << int(b->core.tid) << " mtid: " <<  b->core.mtid << " fdup: " << int(b->core.flag & BAM_FDUP) << " fqcfail: " <<  int( b->core.flag & BAM_FQCFAIL) << endl;
+
+		if ((b->core.flag & BAM_FPAIRED) && (b->core.flag & BAM_FPROPER_PAIR) && (b->core.tid == b->core.mtid) && !( (b->core.flag & BAM_FDUP) || (b->core.flag & BAM_FQCFAIL) )) {
+			const char *p = bam_get_library((bam_header_t *) bf->header, b);
+
+			string & lib = defaultlib;
+			if (p) lib = string(p);
+
+			//cout << "lib: " << lib << endl;
+
+			int isize = abs(b->core.isize);
+
+			LibIterator lit = libInsertSize.find(lib);
+			if (lit == libInsertSize.end()) {
+				libInsertSize[lib]=hash_map<int, long int>();
+				lit = libInsertSize.find(lib);
+			}
+
+			InsIterator iit = lit->second.find(isize);
+			if (iit == lit->second.end()) {
+				(lit->second)[isize]=1;
+			} else {
+				iit->second += 1;
+			}
+
+		}
+		numread++;
+		if (numread % 1000000==999999) {
+			cout << "Number of reads read: " << numread+1 << endl;
+		}
+	}
+	outputIndels(_oldtid, hmap,ofile,fa);
+	outputLibraries(libInsertSize, outputFileLibraries);
+
+	cout << "Wrote indels in CIGARS for target " << _oldtid << " to file " << outputFileName << endl;
+	cout << "Wrote library insert sizes to " << outputFileLibraries << endl;
+	cout << "done!" << endl;
+
+	bam_destroy1(b);
+
+	ofile.close();
+}
+
+GetCandidatesFromCIGAR::GetCandidatesFromCIGAR()
+{
+
+}
+
+
+GetCandidatesFromCIGAR::~GetCandidatesFromCIGAR()
+{
+
+}
diff --git a/GetCandidates.hpp b/GetCandidates.hpp
new file mode 100644
index 0000000..ba609bc
--- /dev/null
+++ b/GetCandidates.hpp
@@ -0,0 +1,107 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * GetCandidates.hpp
+ *
+ *  Created on: Aug 27, 2009
+ *      Author: caa
+ */
+
+#ifndef GETCANDIDATES_HPP_
+#define GETCANDIDATES_HPP_
+#include <map>
+#include <ext/hash_map>
+#include <vector>
+#include "MyBam.hpp"
+#include "Fasta.hpp"
+#include "Variant.hpp"
+using __gnu_cxx::hash;
+namespace std { using namespace __gnu_cxx; }
+// generic class for generating candidates
+class GetCandidates
+{
+public:
+	GetCandidates() {};
+	GetCandidates(const string & bamFile);
+
+	vector<AlignedVariant> get(const string tid, uint32_t start, uint32_t end);
+	void get(const string & outputFileName); // outputs directly to filename of the whole BAMfile
+	void outputToFile(const string & fileName);
+protected:
+	map<string, vector<AlignedVariant> > candidates; // candidates for every chromosome/reference sequence
+	MyBam bam;
+	virtual ~GetCandidates() {};
+};
+
+class GetCandidatesFromCIGAR : public GetCandidates
+{
+public:
+	class Params
+	{
+	public:
+		Params() {
+			alignWindow=100;
+			refPad=10;
+		}
+		int alignWindow, refPad;
+	} params;
+protected:
+	class CIGARindel
+	{
+	public:
+		CIGARindel(const uint32_t _refpos, int _len, const string _seq)
+		{
+			refpos=_refpos;
+			len=_len;
+			seq=_seq;
+		}
+		bool operator<(const CIGARindel & c) const
+		{
+			if (refpos==c.refpos) { if (seq!=c.seq) return seq<c.seq; else return len<c.len; } else return refpos<c.refpos;
+		}
+		uint32_t refpos;
+		int len;
+		string seq;
+	};
+	typedef hash_map<int, map<CIGARindel, int> > HMap;
+
+	class CFFData
+	{
+	public:
+		 HMap hmap;
+	};
+
+public:
+	GetCandidatesFromCIGAR();
+	static int getIndelFromCIGARFetchFunc(const bam1_t *b, void *data);
+	void getIndelFromCIGARRegion(const vector<MyBam *> & myBams, const string & tid, int start, int end, const string & outputFileName, fasta::Fasta & fa);
+	void realignCandidateFile(const string & _varFile, bool isOneBased, const string & outputFileName, const string & fastaName);
+	void get(const string & bamFile, const string & outputFileName);
+	void get(const string & bamFile, const string & outputFileName, const string & fastaName);
+	~GetCandidatesFromCIGAR();
+protected:
+	vector<AlignedVariant> alignCIGAR(const string & tid, const CIGARindel & id, fasta::Fasta & fa);
+	static void getIndelFromCIGAR(const bam1_t *b, vector<CIGARindel> & indels);
+	void outputIndels(const string & tid, const HMap & hmap, ofstream & ofile, fasta::Fasta & fa, int outputType);
+	typedef hash_map<int, long int> InsertSizes;
+	typedef string_hash<InsertSizes> LibInsertSize;
+	typedef LibInsertSize::iterator LibIterator;
+	typedef InsertSizes::iterator InsIterator;
+
+	void outputLibraries(LibInsertSize & libInsertSize, const string & outputFile);
+
+};
+
+#endif /* GETCANDIDATES_HPP_ */
diff --git a/HapBlock.cpp b/HapBlock.cpp
new file mode 100644
index 0000000..ceeb15b
--- /dev/null
+++ b/HapBlock.cpp
@@ -0,0 +1,204 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#include "HapBlock.hpp"
+#include <sstream>
+#include <iostream>
+using namespace std;
+
+HapBlock::HapBlock(const HapBlock & hb, uint32_t _start, uint32_t _len)
+{
+	assert(hb.end()>=_start+_len-1);
+
+	pos0=_start;
+	pos1=_start+_len-1;
+	if (pos1<pos0) {
+		cout << "SMALLER" << endl;
+	}
+	type = HapBlock::NORMAL;
+	assert(pos1>=pos0);
+	haplotypes.clear();
+	bool found=false;
+	for (map<Haplotype, int>::const_iterator it=hb.haplotypes.begin();it!=hb.haplotypes.end();it++) {
+		Haplotype newHap=Haplotype(it->first, _start-hb.pos0, _len);
+		if (newHap.type==Haplotype::Ref) found=true;
+		map<Haplotype, int>::iterator hit=haplotypes.find(newHap);
+		if (hit==haplotypes.end()) {
+			haplotypes[newHap]=it->second;
+		} else {
+			if (newHap.type==Haplotype::Ref) hit->first.type=Haplotype::Ref;
+			hit->second+=it->second;
+		}
+		//haplotypes[ Haplotype(it->first, _start-hb.pos0, _len) ]+=it->second;
+		// += because subhaplotype may occur multiple times
+	}
+}
+
+void HapBlock::insert(const Haplotype & seq)
+{
+	map<Haplotype, int>::iterator hit=haplotypes.find(seq);
+	if (hit==haplotypes.end()) {
+		haplotypes[seq]=1;
+	} else {
+		if (seq.type==Haplotype::Ref) hit->first.type=Haplotype::Ref;
+		hit->second++;
+	}
+}
+
+HapBlock::HapBlock(const Haplotype & h, uint32_t start)
+{
+	pos0=start;
+	pos1=start+h.size()-1;
+	if (pos1<pos0) {
+		cout << pos0 << " " << pos1 << " " << endl;
+		cout << "h: " << h << endl;
+	}
+	assert(pos1>=pos0);
+	haplotypes[h]=1;
+	type=HapBlock::NORMAL;
+}
+
+void HapBlock::setFrequencies()
+{
+	int sum=0;
+	for (map<Haplotype, int>::iterator it=haplotypes.begin();it!=haplotypes.end();it++) {
+		sum+=it->second;
+	}
+	for (map<Haplotype, int>::iterator it=haplotypes.begin();it!=haplotypes.end();it++) {
+		(it->first).freq=double(it->second)/double(sum);
+	}
+}
+ostream &operator<<(ostream &stream, const HapBlock &hb)
+{
+	// construct matrix
+	vector<string> output(hb.length());
+	vector<int> counts;
+	vector<double> freqs;
+	for (map<Haplotype, int>::const_iterator it=hb.haplotypes.begin();it!=hb.haplotypes.end();it++)
+	{
+		for (size_t y=0;y<hb.length();y++) {
+			if ((it->first).size()>y) output[y]+=((it->first)[y]); else output[y]+='.';
+			output[y]+=' ';
+		}
+		counts.push_back(it->second);
+		freqs.push_back(it->first.freq);
+	}
+
+	stream << "start: " << hb.start() << " end: " << hb.end() << " numHap: " << hb.haplotypes.size() << endl;
+	for (size_t y=0;y<output.size();y++) cout << output[y] << endl;
+	for (size_t y=0;y<counts.size();y++) cout << freqs[y] << " "; cout << endl;
+	for (size_t y=0;y<counts.size();y++) cout << counts[y] << " "; cout << endl;
+	for (map<Haplotype, int>::const_iterator it=hb.haplotypes.begin();it!=hb.haplotypes.end();it++) cout << it->first.type << " ";
+	return stream;
+}
+
+bool HapBlock::hasHaplotype(const Haplotype & seq, uint32_t seqStart)
+{
+	//cout << "hasHaplotype(" << seq << "," << seqStart << "): ";
+	for (map<Haplotype, int>::iterator it=haplotypes.begin();it!=haplotypes.end();it++) {
+		if (it->first.compare(seqStart-start(), seq.size(), seq)==0) { it->second++; /*cout << "true" << endl;*/ return true; };
+	}
+	//cout << "false" << endl;
+	return false;
+}
+
+void HapBlock::showVector(ostream &stream,const vector<HapBlock*> & hapBlocks,uint32_t midPos)
+{
+	size_t nb=hapBlocks.size();
+	vector<size_t> length(nb,0), num(nb,0), pos(nb,0);
+	vector<HapBlock*> hbs(nb);
+	size_t y=0,x=0,c=0;
+	const size_t offset=20;
+	size_t indelPos=0;
+	for (x=0;x<nb;x++) if (hapBlocks[x]!=NULL){
+		pos[c]=offset+y;
+		if (midPos>=hapBlocks[x]->start() && midPos<=hapBlocks[x]->end()) indelPos=pos[c];
+		length[c]=hapBlocks[x]->length();
+		y+=length[c];
+		hbs[c]=hapBlocks[x];
+		num[c]=hbs[c]->size();
+		c++;
+	}
+
+	/*
+	for (map<int, HapBlock *>::const_iterator it=hb.insertions.begin();it!=hb.insertions.end();it++,x++) {
+		pos[c]=y;
+		length[c]=it->second->length();
+		y+=length[c];
+		hbs[c]=it->second;
+		num[c]=hbs[c]->size();
+		c++;
+	}
+	*/
+
+	size_t maxLen=*max_element(num.begin(), num.end());
+	vector<string> lines(maxLen*2+1,string(offset+y,' '));
+
+	lines[1][1]='R'; lines[1][2]='E'; lines[1][3]='F';
+	//for (size_t x=0;x<lines.size();x++) { lines[x][0]='\t'; };
+	for (size_t i=0;i<pos.size();i++) {
+			//cout << "o: " << o << " o.size() : " << o.size() << " pos[i]: " << pos[i] << endl;
+		lines[0][pos[i]]='|';
+		/*
+		size_t j=1;
+		for (map<Haplotype, int>::const_iterator it=hbs[i]->haplotypes.begin();it!=hbs[i]->haplotypes.end();it++) {
+			string u=it->first.seq;
+			//cout << "u: " << u << endl;
+			for (size_t l=0;l<u.size();l++) lines[j][pos[i]+l]=u[l];
+			j++;
+		}
+		j=maxLen+1;
+		for (map<Haplotype, int>::const_iterator it=hbs[i]->haplotypes.begin();it!=hbs[i]->haplotypes.end();it++) {
+			string o;
+			ostringstream os(ostringstream::out);
+			os << int(round(-log(it->first.freq))); o=os.str();
+			for (size_t l=0;l<o.size();l++) lines[j][pos[i]+l]=o[l];
+			j++;
+		}
+		*/
+		// order haplotypes such that reference sequence is top, then sorted based on frequency
+		vector<Haplotype> haps; Haplotype refHap;
+		for (map<Haplotype, int>::const_iterator it=hbs[i]->haplotypes.begin();it!=hbs[i]->haplotypes.end();it++) if (it->first.type!=Haplotype::Ref) haps.push_back(it->first); else refHap=it->first;
+		class SortFunc
+		{
+		public:
+			static bool sortFunc(const Haplotype & h1, const Haplotype & h2)  { return h1.freq<h2.freq; };
+		};
+		sort(haps.begin(),haps.end(), SortFunc::sortFunc);
+		haps.push_back(refHap);
+
+		size_t j=1;
+		for (int k=int(haps.size())-1;k>=0;k--) {
+			string u=haps[k].seq;
+			//cout << "u: " << u << endl;
+			for (size_t l=0;l<u.size();l++) lines[j][pos[i]+l]=u[l];
+			j++;
+		}
+		j=maxLen+1;
+
+		for (int k=int(haps.size())-1;k>=0;k--) {
+			string o;
+			ostringstream os(ostringstream::out);
+			os << int(round(-log(haps[k].freq))); o=os.str();
+			for (size_t l=0;l<o.size();l++) lines[j][pos[i]+l]=o[l];
+			j++;
+		}
+
+
+	}
+	lines[0][indelPos]='X';
+	for (size_t j=0;j<lines.size();j++) {
+		stream << lines[j] << endl;
+	}
+}
diff --git a/HapBlock.hpp b/HapBlock.hpp
new file mode 100644
index 0000000..884bafe
--- /dev/null
+++ b/HapBlock.hpp
@@ -0,0 +1,57 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef HAPBLOCK_HPP_
+#define HAPBLOCK_HPP_
+#include <stdint.h>
+#include <string>
+#include <assert.h>
+#include <iostream>
+#include <map>
+#include <string>
+#include <algorithm>
+#include <vector>
+#include "Haplotype.hpp"
+using namespace std;
+
+class HapBlock
+{
+public:
+	map<Haplotype, int> haplotypes;
+	bool operator<(const HapBlock & hb) const { return pos0<hb.pos0; };
+
+
+	HapBlock(const Haplotype & seq, uint32_t start);
+	HapBlock(const HapBlock & hb, uint32_t _start, uint32_t _len);
+	bool hasHaplotype(const Haplotype & seq, uint32_t seqStart);
+	uint32_t start() const { return pos0; };
+	uint32_t end() const { return pos1; };
+	uint32_t length() const { return end()-start()+1; };
+	size_t size() const { return haplotypes.size(); };
+	void insert(const Haplotype & seq);// { haplotypes[seq]++; }
+	vector<pair<Haplotype,int> > getHaplotypes();
+	void setFrequencies();
+	friend ostream &operator<<(ostream &stream, const HapBlock &hb);
+	static void showVector(ostream &stream,const vector<HapBlock*> & hapBlocks, uint32_t midPos);
+	void setType(int _type) { type=_type; };
+	int getType() const { return type; };
+
+	static const int NORMAL=0;
+	static const int INSERT=1;
+private:
+	uint32_t pos0, pos1;
+	int type;
+};
+
+#endif /*HAPBLOCK_HPP_*/
diff --git a/Haplotype.hpp b/Haplotype.hpp
new file mode 100644
index 0000000..638f0a0
--- /dev/null
+++ b/Haplotype.hpp
@@ -0,0 +1,389 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef HAPLOTYPE_HPP_
+#define HAPLOTYPE_HPP_
+#include <stdint.h>
+#include <string>
+#include <assert.h>
+#include <iostream>
+#include <cmath>
+#include <map>
+#include <vector>
+#include "Variant.hpp"
+#include "MLAlignment.hpp"
+#include "foreach.hpp"
+#include <ext/hash_map>
+#include <set>
+//#include "Fast.hpp"
+using namespace std;
+using __gnu_cxx::hash;
+namespace std { using namespace __gnu_cxx; }
+
+const char NUCLEOTIDES[]={'A','T', 'G','C'};
+
+
+
+
+
+class Haplotype //: public string
+{
+public:
+	// ContainsInDel means the haplotype is contains a small non-zero length segment
+	// that was identified as an InDel from Cigar by WH alignment
+	//typedef enum { Normal, In, Del, HasIn, HasDel, HasInDel } Type;
+	typedef int Type;
+	static const int Ref=1;
+	static const int Normal=Ref<<1;
+	static const int In=Ref<<2;
+	static const int Del=Ref<<3;
+
+
+
+	mutable Type type;
+	// nfreq is the product of the frequencies of haplotypes that are not indels
+	mutable double freq, conf, nfreq;
+	uint32_t pos;
+	string seq;
+	string indel; // if haplotype has indel for a given position
+	string align; // annotates for each base in the sequence this haplotype was aligned to whether the equal to the reference=R, snp=S, deletion=D, insertions cannot be recorded this way
+	//vector<pair<string, double> > haps;
+	map<int, AlignedVariant  > indels, snps;
+	MLAlignment ml;
+
+	size_t size() const { return seq.size(); };
+	char & operator[](size_t idx) {  return seq[idx]; };
+	const char & operator[](size_t idx) const {  return seq[idx]; };
+	Haplotype & operator+=(char c) { seq+=c; return *this; };
+	bool operator<(const Haplotype & h) const { return seq<h.seq; };
+
+	/*
+	bool operator<(const Haplotype & h) const
+	{
+		if (seq!=h.seq) {
+			if (type<h.type) return true;
+			else if (type==h.type) return seq<h.seq;
+		} else return seq<h.seq;
+        };
+	*/
+
+
+	int compare ( size_t pos1, size_t n1, const Haplotype & h ) const { return seq.compare(pos1,n1,h.seq); };
+	Haplotype & insert ( size_t pos1, size_t n, char c ) { seq.insert(pos1,n,c); return *this; };
+	void reserve(size_t n) { seq.reserve(n); };
+	Haplotype & append(const string & str) { seq.append(str); return *this; };
+
+	Haplotype(const Haplotype & h, size_t pos0, size_t n)
+	{
+		seq=h.seq.substr(pos0, n);
+		conf=h.conf;
+		freq=h.freq;
+		type=h.type;
+		nfreq=h.nfreq;
+		indel=h.indel;
+		align=h.align;
+		pos=h.pos;
+		snps=h.snps;
+		indels=h.indels;
+		ml=h.ml;
+		//haps=h.haps;
+	};
+	Haplotype()
+	{
+		type=Normal;
+		conf=0.0;
+		freq=0.0;
+		nfreq=0.0;
+		pos=0;
+
+	};
+	Haplotype(Type _type)
+	{
+		type=_type;
+		conf=0.0;
+		freq=0.0;
+		nfreq=0.0;
+		pos=0;
+	}
+	Haplotype(Type _type, const string & _seq )
+	{
+		seq=_seq;
+		type=_type;
+		conf=0.0;
+		freq=0.0;
+		nfreq=0.0;
+		pos=0;
+	}
+	Haplotype(const Haplotype &h)
+	{
+		seq=h.seq;
+		conf=h.conf;
+		freq=h.freq;
+		type=h.type;
+		nfreq=h.nfreq;
+		indel=h.indel;
+		align=h.align;
+		pos=h.pos;
+		snps=h.snps;
+		indels=h.indels;
+		ml=h.ml;
+		//haps=h.haps;
+	}
+
+	Haplotype & operator=(const Haplotype & h)
+	{
+		if (&h!=this) {
+
+			seq=h.seq;
+			conf=h.conf;
+			freq=h.freq;
+			type=h.type;
+			nfreq=h.nfreq;
+			indel=h.indel;
+			pos=h.pos;
+			snps=h.snps;
+			indels=h.indels;
+			align=h.align;
+			ml=h.ml;
+			//haps=h.haps;
+		}
+		return *this;
+	}
+
+	string getIndel(int relPos) const
+	{
+		map<int, AlignedVariant>::const_iterator it=indels.find(relPos);
+		if (it==indels.end()) {
+			char a=align[relPos];
+			if (a=='R') return string("*REF"); else return string("R=>")+=a;
+		} else {
+			const AlignedVariant & av=it->second;
+			//if (av.getType()==Variant::SNP)  throw string("Haplotype::getIndel error");
+			return av.getString();
+		}
+	}
+
+	string getSNP(int relPos) const
+	{
+		map<int, AlignedVariant>::const_iterator it=snps.find(relPos);
+		if (it==snps.end()) {
+			char a=align[relPos];
+			if (a=='R') return string("*REF"); else return string("R=>")+=a;
+		} else {
+			return it->second.getString();
+		}
+	}
+	Haplotype filtered() const
+	{
+		/*
+		Haplotype hap=*this, newhap=*this;
+		newhap.seq.clear();
+		transform(hap.seq.begin(), hap.seq.end(), hap.seq.begin(), ::toupper);
+
+		for (size_t x=0;x<hap.seq.size();x++) {
+			if (hap.seq[x]!='_' && hap.seq[x]!='#') newhap+=hap.seq[x];
+		}
+		*/
+		return *this;
+	}
+
+	void addRefVariant(int rp)
+	{
+		map<int, AlignedVariant>::const_iterator it;
+
+		// first do indels
+		int offset=0;
+		// get base position in haplotype of rp (relative position in reference sequence)
+
+		bool addVariant=true;
+		it=indels.begin();
+		while (it!=indels.end() && it->first<=rp) {
+			if (it->second.getType()==AlignedVariant::DEL){
+				if (it->first+it->second.size()<=rp) {
+					offset-=it->second.size();
+				} else {
+					// deletion deleted rp from reference
+					//addVariant=false;
+					break;
+				}
+			}
+			if (it->second.getType()==AlignedVariant::INS) offset+=it->second.size();
+			it++;
+		}
+
+		if (addVariant) {
+			int readStart=rp+offset;
+			int readEnd=rp+offset;
+			int hapStart=rp;
+			int hapEnd=rp;
+
+			if (indels.find(rp)==indels.end()) {
+				// no indel at position relPos
+				string gt;
+				char a=align[rp];
+				if (a!='R') {
+					gt=string("R=>"); gt+=a;
+				} else gt=string("*REF");
+				indels[rp]=AlignedVariant(gt, hapStart, hapEnd, readStart, readEnd);
+			}
+
+			if (snps.find(rp)==snps.end()) {
+				// no snp at position relPos
+				string gt;
+				char a=align[rp];
+				if (a!='R') {
+					gt=string("R=>"); gt+=a;
+				} else gt=string("*REF");
+				snps[rp]=AlignedVariant(gt, hapStart, hapEnd, readStart, readEnd);
+			}
+		}
+	}
+
+
+	int countIndels() const
+	{
+		int num = 0;
+		for (map<int, AlignedVariant>::const_iterator it = indels.begin();it!=indels.end();it++) {
+			if (it->second.getType() == Variant::INS || it->second.getType() == Variant::DEL) num++;
+		}
+		return num;
+	}
+
+	int countSNPs() const
+	{
+		int num = 0;
+		for (map<int, AlignedVariant>::const_iterator it = snps.begin();it!=snps.end();it++) {
+			if (it->second.getType() == Variant::SNP && !it->second.isRef()) num++;
+		}
+		return num;
+	}
+	/*
+	int getRefPos(int pos) const
+	{
+		// returns position of base in haplotype with respect to reference it was aligned to
+		if (!indels.size()) return pos; else {
+			int offset=0;
+			map<int, AlignedVariant>::const_iterator it=indels.begin();
+			while (it!=indels.end() && pos>it->second.getPos()) {
+				offset-=it->second.length;
+				it++;
+			}
+			return pos+offset;
+		}
+	}
+	*/
+
+	friend ostream &operator<<(ostream &stream, const Haplotype &h)
+	{
+		stream << "type: " << h.type << " seq: " << h.seq << " len: " << h.size() << " nfreq: " << h.nfreq << " freq: " << h.freq << " indel: " << h.indel;
+		return stream;
+	}
+
+	/*
+	void printHaps() const
+	{
+		cout << "freq: " << nfreq << " length: " << seq.size() << endl;
+		for (size_t i=0;i<haps.size();i++) cout << "[" << i << " |" << haps[i].first << "|," << haps[i].second << "]";
+		cout << endl;
+
+		for (size_t i=0;i<haps.size();i++) cout << haps[i].first; cout << endl;
+		for (size_t i=0;i<haps.size();i++) {
+			cout << int(round(-log(haps[i].second)));
+			if (haps[i].first.size()>1) cout << string(haps[i].first.size()-1,' ');
+		}
+		cout << endl;
+		cout << seq << endl;
+
+		cout << endl;
+	}
+	*/
+
+};
+
+
+class HapHash
+{
+public:
+	HapHash(unsigned int _kmer, const Haplotype & hap )
+	{
+		kmer=_kmer;
+		mask=( 1<< (2*kmer) )-1;
+		makeHash(hap);
+	}
+	unsigned int getKmer() const { return kmer; };
+	unsigned int getMask() const { return mask; };
+
+	const set<int> & lookup(const string & seq, int pos) const {
+		int v=convert(seq,pos);
+		Hash::const_iterator it=hash.find(v);
+		if (it==hash.end()) return emptySet; else return it->second;
+	}
+
+	inline const set<int> & lookup(unsigned int key) const
+	{
+		Hash::const_iterator it=hash.find(key);
+		if (it==hash.end()) return emptySet; else return it->second;
+
+	}
+	inline unsigned int convert(const string & seq, int pos) const
+	{
+		if (pos+kmer>seq.size()) throw string("HapHash string too short");
+		int v=0;
+		for (int x=pos, y=0;x<int(pos+kmer);x++,y++) {
+			v |= (map_char(seq[x]) << (2*y) );
+		}
+		return v;
+	}
+	inline unsigned int pushBack(const unsigned int & key, const char & c) const
+	{
+		return (key >> 2) | (map_char(c) << (2*(kmer-1)));
+	}
+	void print() {
+		for (Hash::const_iterator it=hash.begin();it!=hash.end();it++) {
+			cout << "hash[" << it->first << "]: ";
+			BOOST_FOREACH(int i, it->second) {
+				cout << " " << i;
+			}
+			cout << endl;
+
+		}
+
+
+	}
+	inline int map_char(const char & c) const
+	{
+		// TODO do something with N's in reads?
+		if (c=='A') return 0; else if (c=='C') return 1; else if (c=='G') return 2; else if (c=='T') return 3; else return 0; //throw string("Haplotype/Read in hash has N's");
+	}
+	typedef hash_map<unsigned int, set<int> > Hash;
+
+protected:
+	unsigned int kmer;
+	unsigned int mask;
+	const Haplotype *hap_ptr;
+	Hash hash;
+	set<int> emptySet;
+
+
+
+	void makeHash(const Haplotype & hap)
+	{
+		for (int x=0;x<int(hap.size())-int(kmer);x++) hash[convert(hap.seq,x)].insert(x);
+	}
+};
+
+
+
+#endif /*HAPLOTYPE_HPP_*/
+
diff --git a/HaplotypeDistribution.cpp b/HaplotypeDistribution.cpp
new file mode 100644
index 0000000..910252b
--- /dev/null
+++ b/HaplotypeDistribution.cpp
@@ -0,0 +1,486 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#include <stdint.h>
+#include <string>
+#include <assert.h>
+#include <iostream>
+#include <sstream>
+#include <stdio.h>
+#include <stdlib.h>
+#include "HaplotypeDistribution.hpp"
+
+using namespace std;
+
+
+
+
+HaplotypeDistribution::HaplotypeDistribution(uint32_t _midPos, const string & refSeq, uint32_t refSeqStart)
+{
+	pos0=0;
+	pos1=0;
+	midPos=_midPos;
+
+	// add reference sequence
+	uint32_t bs = 4;
+	uint32_t rm = refSeq.size() % bs;
+	int add = 1; if (rm==0) add=0;
+
+	for (size_t x=0;x<(refSeq.size()/bs)+add;x++) {
+		uint32_t start = refSeqStart+x*bs;
+		Haplotype refHap(Haplotype::Ref, refSeq.substr(x*bs,bs));
+		insertSeq(refHap, start);
+	}
+}
+
+
+ostream &operator<<(ostream &stream, const HaplotypeDistribution &hb)
+{
+	size_t cnt=0;
+
+	for (size_t x=0;x<hb.hapBlocks.size();x++) if (hb.hapBlocks[x]!=NULL){
+		stream << "HAPLOTYPE BLOCK " << cnt++ << endl;
+		stream << *hb.hapBlocks[x] << endl;
+	}
+
+	cnt=0;
+
+	for (map<int, HapBlock *>::const_iterator it=hb.insertions.begin();it!=hb.insertions.end();it++) {
+		stream << "INSERTION " << cnt++ << endl;
+		stream << *it->second << endl;
+	}
+	return stream;
+}
+
+
+
+int HaplotypeDistribution::fetchFuncInsertRead(const bam1_t *b, void *data)
+{
+	( (HaplotypeDistribution*) data)->insertRead(b);
+	return 0;
+}
+
+void HaplotypeDistribution::insertRead(const bam1_t* b)
+{
+	 if ((b->core.flag & BAM_FMUNMAP) != 0) return;
+	/*
+	for each cigar operation in read {
+
+		get sequence corresponding to cigar n
+		calc starting position in reference
+		calc confidence of seq (product of mapping quality and base qualities)
+		make haplotype-> seq
+		insert_seq(seq, reference_pos)
+	*/
+
+	const bam1_core_t *c=&b->core;
+	uint32_t *cigar=bam1_cigar(b);
+	uint32_t k, l=0;
+	uint32_t refPos = c->pos;
+	int lastop=-1;
+	uint32_t lastPos=refPos;
+	//cout << "read: " << bam1_qname(b) << endl;
+	for (k = 0; k < c->n_cigar; ++k) {
+		int op = cigar[k] & BAM_CIGAR_MASK;
+		int32_t len=cigar[k] >> BAM_CIGAR_SHIFT;
+	//	cout << "cigar" << k << endl;
+
+		Haplotype seq; seq.reserve(len);
+		seq.conf=(double) c->qual; // this scales the confidence from the individual base calls with the mapping confidence
+
+		if (op==BAM_CINS || op==BAM_CMATCH || op==BAM_CSOFT_CLIP) {
+			for(int32_t x=0;x<len;x++) {
+				seq+=( bam_nt16_rev_table[ bam1_seqi(bam1_seq(b), l) ] );
+				seq.conf+=(double) bam1_qual(b)[l]; // base quality is on log10 scale
+				l++;
+			}
+		} else if (op==BAM_CDEL) {
+			seq.insert(0, len, '#');
+	//		cout << "INSDELETION" << endl;
+		}
+
+
+		if (op==BAM_CINS) seq.type=Haplotype::In; else if (op==BAM_CDEL) seq.type=Haplotype::Del; else seq.type=Haplotype::Normal;
+		//cout << endl << " *** " << endl;
+		//cout << "op: " << op  << " len: " << len << endl;
+		//cout << "CIGARseq: " << seq << endl;
+		//cout << "refPos " << refPos << endl;
+
+		// now add it to the haplotype structure for this location
+		if (seq.size()) {
+			if (1) { //refPos!=midPos) {
+				//cout << bam1_qname(b) << " refpos: " << refPos << " seq: " << seq << endl;
+				if (seq[0]=='#') {
+					// deletion, recode
+
+					if (seq.size()>30) {
+						cerr << string("Deletion is too long...") << endl;
+						len = 30;
+
+					}
+					seq.seq.clear();
+					seq.seq+=(char(int('#')+len));
+				}
+				insertSeq(seq, refPos);
+			}
+			else {
+				if (op==BAM_CINS || op==BAM_CDEL) {
+					indelsAtMidPos.insert(seq);
+				} else insertSeq(seq, refPos);
+			}
+		}
+
+		// check if previous seq was not an insert
+		if (lastop!=-1 && lastop!=BAM_CINS) {
+			if (lastPos==refPos && lastop!=BAM_CSOFT_CLIP && lastop!=BAM_CHARD_CLIP) throw string("Mag niet.");
+			for (uint32_t pos=lastPos;pos<refPos;pos++) {
+				map<int, HapBlock*>::iterator it=insertions.find(pos);
+				if (it!=insertions.end()) {
+					(it->second)->insert(Haplotype(Haplotype::In));
+				}
+			}
+		}
+
+		// update position for the next cigar
+		lastPos=refPos;
+		if (op == BAM_CMATCH || op == BAM_CDEL || op==BAM_CREF_SKIP) {
+			refPos+=(uint32_t) len;
+		} else if (op!=BAM_CINS && op != BAM_CSOFT_CLIP && op != BAM_CHARD_CLIP) throw string("I don't know how to smoke this CIGAR");
+		lastop=op;
+	}
+
+
+}
+
+vector<Variant> HaplotypeDistribution::getIndelVariantsAtMidPos()
+{
+	vector<Variant> variants;
+	BOOST_FOREACH(Haplotype hap, indelsAtMidPos) {
+		if (hap.type==Haplotype::In) {
+			variants.push_back(Variant(string("+").append(hap.seq)));
+		} else if (hap.type==Haplotype::Del) {
+			variants.push_back(Variant(string("-").append(string(hap.seq.size(),'R'))));
+		} else throw string("Unrecognized variant");
+	}
+	return variants;
+}
+
+void HaplotypeDistribution::setFrequencies()
+{
+	for (size_t x=0;x<hapBlocks.size();x++) {
+		HapBlock * hb=hapBlocks[x];
+		if (hb!=NULL) {
+			hb->setFrequencies();
+		}
+	}
+	// insertions
+
+	for (map<int, HapBlock*>::iterator it=insertions.begin();it!=insertions.end();it++) {
+		HapBlock * hb=it->second;
+		if (hb!=NULL) {
+			hb->setFrequencies();
+		}
+	}
+
+}
+
+void HaplotypeDistribution::updateBlock(HapBlock *hb, const Haplotype & seq, uint32_t seqStart)
+{
+	if (seq.size()!=hb->length() || seqStart!=hb->start()) throw string("updateBlock-seq mismatch.");
+	hb->insert(seq);
+}
+
+
+
+
+
+bool sortFunc(const HapBlock *a, const HapBlock *b)
+{
+	if (a==NULL && b!=NULL) return false;
+	else if (a!=NULL && b==NULL) return true;
+	else if (a==NULL && b==NULL) return false;
+	else {
+		if (a->start()<b->start()) return true; else return false;
+	}
+}
+
+void HaplotypeDistribution::check()
+{
+	sort(hapBlocks.begin(), hapBlocks.end(), sortFunc);
+	vector<HapBlock*>::iterator it=find(hapBlocks.begin(), hapBlocks.end(),(HapBlock *) NULL );
+	for (vector<HapBlock*>::iterator it2=it;it2!=hapBlocks.end();it2++) {
+		if (*it2!=NULL) throw string("Error: NULLs not consecutive");
+	}
+	for (size_t x=0;x<hapBlocks.size();x++) {
+		if (hapBlocks[x]->end()<hapBlocks[x]->start()) {
+			cout << "CHECK SMALLER HD:" << endl;
+			cout << *this << endl;
+
+			throw string("Blocks are smaller!");
+		}
+	}
+	for (size_t x=1;x<hapBlocks.size();x++) {
+		if (hapBlocks[x-1]->end()+1!=hapBlocks[x]->start()) {
+			cout << "CHECK CONSECUTIVE HD:" << endl;
+			cout << *this << endl;
+
+			throw string("Blocks are not consecutive!");
+		}
+	}
+
+	for (size_t x=1;x<hapBlocks.size();x++) {
+		if (hapBlocks[x-1]->end()>=hapBlocks[x]->start()) {
+			cout << "CHECK HD:" << endl;
+			cout << *this << endl;
+
+			throw string("Blocks are overlapping!");
+		}
+	}
+}
+
+
+void HaplotypeDistribution::newBlock(HapBlock *hb)
+{
+	//checkBlock(hb);
+	vector<HapBlock*>::iterator it=find(hapBlocks.begin(), hapBlocks.end(),(HapBlock *) NULL );
+	if (it==hapBlocks.end()) {
+		hapBlocks.push_back(hb);
+	} else {
+		*it=hb;
+	}
+	if (hb->start()<pos0) pos0=hb->start();
+	if (hb->end()>pos1) pos1=hb->end();
+
+	sort(hapBlocks.begin(), hapBlocks.end(), sortFunc);
+}
+
+
+/*
+void HaplotypeDistribution::invalidateBlock(HapBlock *hb)
+{
+	for (uint32_t p=hb->start();p<=hb->end();p++) {
+		posToBlock[p-pos0]=-1;
+	}
+}
+*/
+void HaplotypeDistribution::deleteBlock(int idx)
+{
+	delete hapBlocks[idx];
+	hapBlocks[idx]=NULL;
+	if (hapBlocks.size()>1 && idx!=(int)hapBlocks.size()-1) {
+			hapBlocks[idx]=hapBlocks.back();
+			hapBlocks.back()=NULL;
+	}
+}
+
+
+void HaplotypeDistribution::splitBlock(int idx, const Haplotype & seq, uint32_t seqStart)
+{
+	// block **********
+	// seq	    ***
+
+	if (seq.size()==0) throw string("Empty haplotype!");
+
+	uint32_t seqEnd=seqStart+seq.size()-1;
+	HapBlock & block=*hapBlocks[idx];
+	if (seqStart<block.start()||seqStart+seq.size()-1>block.end()) throw string("seq outside of block boundaries");
+
+	uint32_t lenA=seqStart-block.start();
+	uint32_t lenB=seq.size();
+	if (lenB==0) throw string("Empty sequence!");
+	uint32_t lenC=(block.end()==seqStart+seq.size()-1) ? 0 : block.end()-seqEnd;
+	// cout << "block.start: " << block.start() << " block.end " << block.end() << " seqStart: " << seqStart << " seqEnd: " << seqStart+seq.size()-1 << " lenA: " << lenA << " lenB: " << lenB << " lenC: " << lenC << endl;
+	if (1) { //!block.hasHaplotype(seq, seqStart)) {
+		// split them
+		// note that blocks are not overlapping, so we can append them to the vector
+		HapBlock *hbA, *hbB, *hbC;
+		if (lenA) hbA=new HapBlock(block, block.start(), lenA);
+		if (lenB) hbB=new HapBlock(block, block.start()+lenA, lenB);
+
+
+
+		if (lenC) hbC=new HapBlock(block, hbB->end()+1, lenC);
+
+		deleteBlock(idx);
+
+		if (lenB) newBlock(hbB);
+		updateBlock(hbB, seq, seqStart);
+
+		//if (lenA) cout << "hbA: " << *hbA << endl;
+		//if (lenB) cout << "hbB: " << *hbB << endl;
+		//if (lenC) cout << "hbC: " << *hbC << endl;
+
+		if (lenA) newBlock(hbA);
+		if (lenC) newBlock(hbC);
+	}
+
+
+
+}
+
+int HaplotypeDistribution::getFirstOverlappingBlock(uint32_t seqStart, uint32_t seqEnd) const
+{
+	size_t x=0;
+	while (x<hapBlocks.size()&&hapBlocks[x]!=NULL) {
+		const HapBlock & hb=*hapBlocks[x];
+		if ( (hb.end()>=seqStart) && hb.start()<=seqEnd ) return int(x);
+		x++;
+	}
+	return -1;
+}
+
+void HaplotypeDistribution::insertSeq(Haplotype & seq, uint32_t seqStart)
+{
+	// this->check();
+	if (seq.type==Haplotype::Normal || seq.type==Haplotype::Ref || seq.type==Haplotype::Del) {
+		//cout << "insertSeq. seq: " << seq << " seqStart: " << seqStart << endl;
+		uint32_t seqEnd = seqStart+(uint32_t) seq.size()-1;
+		int idx=getFirstOverlappingBlock(seqStart, seqEnd);
+		if (idx!=-1) {
+			// cout << "found idx: " << idx << endl;
+			HapBlock & block=*hapBlocks[idx];
+			if (block.start()<seqStart) {
+				assert(block.end()>=seqStart);
+				if (seqEnd>block.end()) {
+	//				cout << "typeA" << endl;
+					uint32_t overlap=block.end()-seqStart+1;
+					// block **********
+					// seq          ********
+					Haplotype seqA(seq, 0, overlap);
+					splitBlock(idx, seqA, seqStart);
+					Haplotype seqB(seq, overlap, seq.size());
+					insertSeq(seqB, seqStart+overlap);
+				} else {
+	//				cout << "typeB" << endl;
+					// block ***************
+					// seq       *****
+					splitBlock(idx, seq, seqStart);
+				}
+			} else
+			{
+				// block.start() >=seqStart
+				if (block.end()>seqEnd) {
+					// block.start() >=seqStart && block.end()>seqEnd
+	//				cout << "typeC" << endl;
+					// block      ***********
+					// seq    *******
+					uint32_t overlap=seqEnd-block.start()+1;
+					assert(overlap>0 && overlap<=seq.size());
+
+					Haplotype seqB(seq, seq.size()-overlap, overlap);
+
+					splitBlock(idx, seqB, block.start());
+	//				cout << "seqB: " << seqB << endl;
+
+					if (overlap<seq.size()) {
+						Haplotype seqA(seq, 0, seq.size()-overlap);
+						// cout << "seqA: " << seqA << endl;
+						newBlock(new HapBlock(seqA, seqStart));
+					}
+
+					// note that newBlock invalidates idx!
+
+				} else
+				{
+					// block.start() >=seqStart && block.end()<=seqEnd
+	//				cout << "typeD" << endl;
+					// block      ********* ***
+					// seq      ******************
+					uint32_t lenA=block.start()-seqStart;
+					uint32_t lenB=block.end()-block.start()+1;
+					uint32_t lenC=seqEnd-block.end();
+					if (lenA) {
+						Haplotype seqA(seq, 0, lenA);
+						newBlock(new HapBlock(seqA, seqStart));
+					}
+
+					assert(lenB>0);
+					Haplotype seqB(seq, lenA, lenB);
+					updateBlock(&block, seqB, seqStart+lenA);
+
+					if (lenC) {
+						Haplotype seqC(seq, lenA+lenB, lenC);
+						insertSeq(seqC, seqStart+lenA+lenB);
+					}
+				}
+			}
+
+		} else {
+			// cout << "newBLock" << endl;
+			newBlock(new HapBlock(seq, seqStart));
+		}
+	}
+	else if (seq.type==Haplotype::In) {
+		map<int, HapBlock *>::iterator it=insertions.find(seqStart);
+		if (it==insertions.end()) {
+			insertions[seqStart]=new HapBlock(seq, seqStart);
+			insertions[seqStart]->setType(HapBlock::INSERT);
+			insertions[seqStart]->insert(Haplotype(Haplotype::Ref));//, string(seq.size(),'0')));
+		} else
+		{
+			it->second->insert(seq);
+		}
+	}
+	else throw string("Cannot handle this case.");
+}
+
+size_t HaplotypeDistribution::getNumberOfHaplotypes(uint32_t start, uint32_t end) const
+{
+	size_t n=1;
+	size_t x=0;
+	while (x<hapBlocks.size() && hapBlocks[x]!=NULL) {
+		const HapBlock & hb=*hapBlocks[x];
+		if (hb.end()>=start && hb.start()<=end) {
+			n*=hapBlocks[x]->size();
+		}
+		x++;
+	}
+	for (map<int, HapBlock*>::const_iterator it=insertions.begin();it!=insertions.end();it++) {
+		if (it->first>=(int) start && it->first<=(int)end) {
+			n*=(it->second->size()); // we consider also haplotypes without the insertion
+		}
+	}
+	return n;
+}
+
+size_t HaplotypeDistribution::getNumberOfHaplotypes(uint32_t start, uint32_t end, double minFreq) const
+{
+	size_t n=1;
+	size_t x=0;
+	while (x<hapBlocks.size() && hapBlocks[x]!=NULL) {
+		const HapBlock & hb=*hapBlocks[x];
+		if (hb.end()>=start && hb.start()<=end) {
+			size_t nh=0;
+			for (map<Haplotype, int>::const_iterator hit=hb.haplotypes.begin();hit!=hb.haplotypes.end();hit++) {
+				const Haplotype & h=hit->first;
+				if (h.type==Haplotype::In || h.type==Haplotype::Del) nh++;
+					else { if (h.freq>minFreq) nh++; }
+			}
+			n*=nh;
+		}
+		x++;
+	}
+	for (map<int, HapBlock*>::const_iterator it=insertions.begin();it!=insertions.end();it++) {
+		if (it->first>=(int) start && it->first<=(int)end) {
+			n*=(it->second->size()); // we consider also haplotypes without the insertion
+		}
+	}
+	return n;
+}
+
+HaplotypeDistribution::~HaplotypeDistribution()
+{
+	for (size_t x=0;x<hapBlocks.size();x++) if (hapBlocks[x]!=NULL) delete hapBlocks[x];
+	for (map<int,HapBlock*>::iterator it=insertions.begin();it!=insertions.end();it++) delete it->second;
+}
diff --git a/HaplotypeDistribution.hpp b/HaplotypeDistribution.hpp
new file mode 100644
index 0000000..9432c0c
--- /dev/null
+++ b/HaplotypeDistribution.hpp
@@ -0,0 +1,498 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef HAPLOTYPEDISTRIBUTION_HPP_
+#define HAPLOTYPEDISTRIBUTION_HPP_
+#include <string>
+#include <assert.h>
+#include <iostream>
+#include <stdint.h>
+#include <vector>
+#include <list>
+#include <set>
+#include <string>
+#include <map>
+#include <cmath>
+#include "bam.h"
+#include "Haplotype.hpp"
+#include "HapBlock.hpp"
+#include "foreach.hpp"
+#include "VariantFile.hpp"
+using namespace std;
+
+
+
+
+class HaplotypeDistribution
+{
+friend class HDIterator2;
+// methods
+public:
+	HaplotypeDistribution(uint32_t _midPos, const string & refSeq, uint32_t refSeqStart);
+
+	void insertRead(const bam1_t *b);
+	static int fetchFuncInsertRead(const bam1_t *b, void *data);
+	pair<vector<Haplotype>, vector<double> > enumerateHaplotypes(double th);
+	friend ostream &operator<<(ostream &stream, const HaplotypeDistribution &hb);
+	void insertSeq(Haplotype & seq, uint32_t seqStart);
+	void check();
+	size_t getNumberOfHaplotypes(uint32_t start, uint32_t end) const;
+	size_t getNumberOfHaplotypes(uint32_t start, uint32_t end, double minFreq) const;
+	void setFrequencies();
+	~HaplotypeDistribution();
+	set<Haplotype> getIndelsAtMidPos() const { return indelsAtMidPos; };
+	vector<Variant> getIndelVariantsAtMidPos();
+protected:
+	void updateBlock(HapBlock *hb, const Haplotype & seq, uint32_t seqStart);
+	void newBlock(HapBlock *hb);
+	void deleteBlock(int idx);
+	void splitBlock(int idx, const Haplotype & seq, uint32_t seqStart);
+	int getFirstOverlappingBlock(uint32_t seqStart, uint32_t seqEnd) const;
+
+	uint32_t len;
+	uint32_t pos0, pos1, midPos;
+	vector<HapBlock*> hapBlocks;
+	map<int, HapBlock*> insertions;
+
+	set<Haplotype> indelsAtMidPos;
+
+};
+
+
+class HDHapBlock
+{
+public:
+	HDHapBlock() { };
+	vector<Haplotype> haps;
+	uint32_t start, end;
+	int type;
+};
+
+class HDIterator2
+{
+public:
+	HDIterator2(const HaplotypeDistribution &hd, size_t maxHap, uint32_t pos, uint32_t left, uint32_t right, int _noIndelWindow=-1)
+	{
+		// noIndelWindow ignores indels around pos
+		noIndelWindow=_noIndelWindow;
+		// variants will be added at position pos
+
+		hdPtr=&hd;
+		midPos=pos;
+		setupBlocks(hd, pos, left, right);
+		setThresholds(maxHap);
+		init();
+	};
+
+	void init()
+	{
+		for (size_t x=0;x<iter.size();x++) {
+			iter[x]=0;
+		}
+		hap.seq.clear();
+		_last=false;
+	};
+	void operator++()
+	{
+		size_t x;
+		for (x=0;x<iter.size() && (++iter[x])==max[x];++x) {
+			iter[x]=0;
+			if (x==iter.size()-1) _last=true;
+		}
+	};
+	bool last() const { return _last; };
+	uint32_t start() const { return (*hapBlocks.begin())->start(); };
+	uint32_t end() const { return (*hapBlocks.rbegin())->end(); };
+	Haplotype getMaxFreqHap() const
+	{
+		Haplotype maxh;
+		maxh.seq.clear();
+		maxh.freq=1.0;
+		maxh.nfreq=1.0;
+		for (size_t x=0;x<hbs.size();x++) {
+			double mf=0.0;
+			size_t idx;
+			for (size_t y=0;y<hbs[x].haps.size();y++) if (hbs[x].haps[y].freq>mf) { idx=y; mf=hbs[x].haps[y].freq; };
+			maxh.freq*=mf;
+			if (!hasIndel[x]) maxh.nfreq*=mf;
+			maxh.append(hbs[x].haps[idx].seq);
+		}
+		return maxh;
+	}
+
+	operator Haplotype()
+	{
+		throw string("REIMPLEMENT");
+		hap.seq.clear();
+		hap.freq=1.0;
+		hap.nfreq=1.0;
+		hap.type=Haplotype::Normal;
+		//hap.haps.clear();
+		for (size_t x=0;x<iter.size();x++) {
+			const Haplotype & h=hbs[x].haps[iter[x]];
+			// do not append deletions as they are codes by '#'
+			if (h.type==Haplotype::In || h.type==Haplotype::Normal) {
+				hap.append(h.seq);
+			}
+			if (h.seq.size()>0) {
+				hap.type|=h.type;
+			}
+
+			hap.freq*=h.freq;
+			if (hasIndel[x]==0) {
+				hap.nfreq*=h.freq;
+			}
+			//hap.haps.push_back(pair<string, double>(h.seq, h.freq));
+		}
+		return hap;
+	}
+
+
+	double getLogNumHaps() const { return logNumHap; };
+	friend ostream &operator<<(ostream &stream, const HDIterator2 & hdi)
+	{
+		vector<HapBlock *> hb; hb.reserve(hdi.hapBlocks.size());
+		for (list<HapBlock*>::const_iterator lit=hdi.hapBlocks.begin();lit!=hdi.hapBlocks.end();lit++) hb.push_back(*lit);
+		HapBlock::showVector(stream, hb, hdi.midPos);
+		return stream;
+	}
+
+	void generateHapsWithAlignedVariants(vector<Haplotype> & haps, const AlignedCandidates & variants, int print=0, bool changeINStoN=false)
+	{
+		haps.clear();
+		if (print) {
+			cout << "Variants: ";
+			BOOST_FOREACH(Variant var, variants.variants) {
+				cout << "[" << var.size() << " " << var.getSeq() << "]";
+			}
+			cout << endl;
+		}
+
+		//map <Haplotype, Haplotype> pRef, pInd;
+		set <Haplotype> setHap;
+		vector <Haplotype> vecHap;
+		vector <vector<int> > vecRefPos;
+
+		size_t minLen=100000;
+		init();
+		while (!last()) {
+			hap.seq.clear();
+			hap.freq=1.0;
+			hap.nfreq=1.0;
+			hap.type=Haplotype::Normal;
+			//hap.haps.clear();
+
+			vector<int> refPos;
+			for (size_t x=0;x<iter.size();x++) {
+				const Haplotype & h=hbs[x].haps[iter[x]];
+				int len = hbs[x].end-hbs[x].start+1;
+				if (hbs[x].type == HapBlock::NORMAL) {
+					int p = hbs[x].start;
+					bool hasDel = false;
+					for (size_t y=0;y<h.seq.size();y++) {
+						int c=int(h.seq[y]);
+						if (c>=35 && c<65) { hasDel = true; }
+						refPos.push_back(p);
+						p++;
+					}
+					if (hasDel == false && int(h.seq.size())!=len) throw string("What's going on here?");
+				} else if (hbs[x].type == HapBlock::INSERT) {
+					for (size_t y=0;y<h.seq.size();y++) {
+						refPos.push_back(-1);
+					}
+				}
+				hap.append(h.seq);
+				hap.freq*=h.freq;
+			}
+
+			// effectuate deletions at positions outside midPos
+
+			size_t y=0;
+			while (y<hap.size()) {
+				int c=int(hap[y]);
+				if (c>=35 && c<65) {
+					int len=c-int('#');
+					if (len>int(hap.size()-y)) len=hap.size()-y;
+					hap.seq.erase(y,len);
+					refPos.erase(refPos.begin()+y,refPos.begin()+y+len);
+				} else y++;
+			}
+			vecHap.push_back(hap);
+			vecRefPos.push_back(refPos);
+			++(*this);
+		}
+
+		// first add variants combinatorially, the add variants to the set of combinatorially generated haplotypes
+
+		for (int ac = 1;ac>=0;ac--) {
+			size_t numHap = vecHap.size();
+
+			bool addComb = false;
+			if (ac==1) {
+				addComb = true;
+			} else numHap = vecHap.size();
+			BOOST_FOREACH(AlignedVariant var, variants.variants) {
+				if (addComb) {
+					numHap = vecHap.size();
+				}
+
+				if (var.getAddComb()==addComb) {
+					for (size_t h=0;h<numHap;h++) {
+						Haplotype _hap=vecHap[h];
+						bool changed=false;
+
+						//cout << "******************************" << endl;
+						//cout << "var: " << var.getStartHap() << " " << var.getString() << endl;
+						//cout << " hap: " << vecHap[h].seq << endl;
+
+						vector<int> refPos = vecRefPos[h];
+						vector<int>::iterator it = find(refPos.begin(), refPos.end(), var.getStartHap());
+						if (it!=refPos.end()) {
+
+							int idx = distance(refPos.begin(), it);
+							if (var.getType()==Variant::DEL) {
+								// deletion
+								_hap.seq.erase(idx, var.size());
+								refPos.erase(refPos.begin()+idx, refPos.begin()+idx+var.size());
+								changed=true;
+							} else if (var.getType()==Variant::INS) {
+								// insertion
+								if (changeINStoN) {
+									_hap.seq.insert(idx, string(var.getSeq().size(), 'N'));
+								} else {
+									_hap.seq.insert(idx, var.getSeq());
+								}
+								refPos.insert(refPos.begin()+idx, (size_t) var.size(), -1);
+
+								changed=true;
+							} else if (var.getType()==Variant::SNP) {
+								// snp
+								const string & seq=var.getSeq();
+								char nuc=seq[3];
+								if (_hap.seq[idx]!=seq[3]) {
+									_hap.seq[idx]=nuc;
+									changed=true;
+								}
+							}
+							if (changed) {
+							//	cout << "_hap: " << _hap.seq << endl;
+								vecHap.push_back(_hap);
+								vecRefPos.push_back(refPos);
+							}
+						}
+					}
+				}
+			}
+		}
+		for (size_t x=0;x<vecHap.size();x++) if (vecHap[x].size()<minLen) minLen=vecHap[x].size();
+
+		BOOST_FOREACH(Haplotype hap, vecHap) {
+			//setHap.insert(Haplotype(hap,0, minLen));
+			setHap.insert(hap);
+		}
+
+		BOOST_FOREACH(Haplotype hap, setHap) {
+			haps.push_back(hap);
+		}
+
+	}
+
+protected:
+	void setupBlocks(const HaplotypeDistribution &hd, uint32_t pos, uint32_t left, uint32_t right)
+	{
+		//cout << "_minFreq: " << _minFreq << endl;
+		for (size_t x=0;x<hd.hapBlocks.size();x++) if (hd.hapBlocks[x]!=NULL) {
+			if (x) {
+				if (hd.hapBlocks[x-1]->end()>hd.hapBlocks[x]->start()) {
+					cout << hd.hapBlocks[x-1]->end() << " " << hd.hapBlocks[x]->start() << endl;
+					cout << "HD: " << endl << hd << endl;	
+					throw string("Blocks are overlapping.");
+				}
+			}
+			if (hd.hapBlocks[x]->start()>=left && hd.hapBlocks[x]->end()<=right) {
+				if (hd.hapBlocks[x-1]->end()+1!=hd.hapBlocks[x]->start()) {
+					cout << "NOT CONSECUTIVE" << endl;
+					cout << hd.hapBlocks[x-1]->end() << " " << hd.hapBlocks[x]->start() << endl;
+					cout << "HD: " << endl << hd << endl;	
+	
+					throw string("Blocks are not consecutive.");
+
+				}
+
+				hapBlocks.push_back(hd.hapBlocks[x]);
+				//cout << *hd.hapBlocks[x] << endl;
+			}
+		}
+
+		// insertions
+
+		list<HapBlock*>::iterator lit=hapBlocks.begin();
+		for (map<int, HapBlock*>::const_iterator it=hd.insertions.begin();it!=hd.insertions.end();it++) {
+			if (it->second->start()>=left) {
+				for (list<HapBlock*>::iterator lit2=lit;lit2!=hapBlocks.end();lit2++) {
+					if (int((*lit2)->start())>=it->first)  {
+						hapBlocks.insert(lit2, it->second);
+						lit=lit2;
+						break;
+					}
+				}
+			}
+		}
+
+		// copy
+
+		bool found=false;
+		hbs.resize(hapBlocks.size());
+		hasIndel.resize(hapBlocks.size());
+		int x=0;
+		for (lit=hapBlocks.begin();lit!=hapBlocks.end();lit++,x++) {
+			uint32_t bs=(*lit)->start();
+			uint32_t be=(*lit)->end();
+			if (pos>=bs && pos<=be) {
+				indelIdx=x;
+				indelOffs=pos-bs;
+				found=true;
+			}
+			hasIndel[x]=0;
+			for (map<Haplotype,int>::const_iterator it=(*lit)->haplotypes.begin();it!=(*lit)->haplotypes.end();it++) {
+				hbs[x].haps.push_back(it->first);
+			}
+			hbs[x].start=bs;
+			hbs[x].end=be;
+			hbs[x].type=(*lit)->getType();
+			if (hbs[x].type==HapBlock::INSERT) hbs[x].end=hbs[x].start-1;
+			// set frequency of reference haplotype in block
+			// this makes sure that the reference haplotype is always included
+			bool reffound=false;
+			for (size_t y=0;y<hbs[x].haps.size();y++) {
+				if (hbs[x].haps[y].type==Haplotype::Ref) {
+					reffound=true;
+					//hbs[x][y].freq=1.0;
+				} else {
+					//for (size_t z=0;z<hbs[x].haps[y].seq.size();z++) hbs[x].haps[y].seq[z] = tolower(hbs[x].haps[y].seq[z]);
+				}
+			}
+			if (!reffound) {
+				cout << **lit << endl;
+
+			}
+			assert(reffound==true);
+		}
+
+		if (hbs.size() == 0) {
+			throw string("Not enough blocks.");
+		}
+
+		//if (!found) throw string("Cannot find position of indel in haplotypedistribution.");
+	//	cout << "maxFreqHap: " << getMaxFreqHap() << endl;
+
+
+	};
+
+	void setThresholds(size_t maxHap)
+	{
+		// hasIndel is currently set to zero for all blocks, because HaplotypeDistribution
+		// does not includes indels at midPos
+		// get lowest frequency
+		vector<double> minFreq(hbs.size(),0.0);
+		vector<int> elim(hbs.size(),1);
+		size_t x=0;
+
+		typedef vector<Haplotype>::iterator LHIt;
+		LHIt it;
+
+		double logMinHap=0.0;
+		double logNH=0.0;
+
+		for (x=0;x<hbs.size();x++) {
+			logNH+=log(double(hbs[x].haps.size()));
+		}
+
+
+
+
+		double logMH=log(double(maxHap));
+		if (logMH<logMinHap) logMH=logMinHap;
+
+
+		// keep removing haplotypes until we have the desired number of haplotypes
+		bool erased=true;
+		while (logNH>logMH && erased) {
+			erased=false;
+			for (x=0;x<hbs.size();x++) {
+				double mf=2.0;
+				for (it=hbs[x].haps.begin();it!=hbs[x].haps.end();it++) {
+					if (it->type!=Haplotype::Ref && it->freq<mf) mf=it->freq;
+				}
+				if (hbs[x].haps.size()<=1) { minFreq[x]=2.0; elim[x]=0; } else minFreq[x]=mf;
+			}
+
+			vector<double>::iterator mel=min_element(minFreq.begin(), minFreq.end());
+			assert(mel!=minFreq.end());
+			size_t y=distance(minFreq.begin(),mel);
+
+			if (elim[y]==0) break;
+			// erase the element
+			for (it=hbs[y].haps.begin();it!=hbs[y].haps.end();it++) if (it->type!=Haplotype::Ref && it->freq<=*mel) {
+				hbs[y].haps.erase(it);
+				erased=true;
+				break;
+			}
+
+			logNH=0.0;
+
+			for (x=0;x<hbs.size();x++) {
+				logNH+=log(double(hbs[x].haps.size()));
+
+			}
+			//cout << "logNH: " << logNH << " logMH: " << logMH << endl;
+		}
+		max.resize(hbs.size(),0);
+		iter.resize(hbs.size(),0);
+		for (x=0;x<hbs.size();x++) max[x]=hbs[x].haps.size();
+
+		logNumHap=logNH;
+
+		// check if we still have the reference sequence in every block
+		for (size_t x=0;x<hbs.size();x++) {
+			// cout << "hbs[" << x << "]: " << hbs[x].haps.size() << endl;
+			bool reffound=false;
+			for (size_t y=0;y<hbs[x].haps.size();y++) {
+				if (hbs[x].haps[y].type==Haplotype::Ref) {
+					reffound=true;
+				}
+			}
+			if (!reffound) {
+				cout << "x: " << x << endl;
+
+			}
+			if (!reffound) { throw string("Cannot find reference sequence."); };
+		}
+	}
+
+	double logNumHap;
+	bool _last;
+	vector<int > iter, max;
+	vector<size_t> hasIndel;
+	list<HapBlock *> hapBlocks;
+	Haplotype hap;
+	vector<HDHapBlock > hbs;
+	int indelIdx, indelOffs, noIndelWindow;
+	uint32_t midPos;
+
+	typedef list<HapBlock*>::iterator HBIt;
+	const HaplotypeDistribution *hdPtr;
+};
+
+#endif
diff --git a/Library.hpp b/Library.hpp
new file mode 100644
index 0000000..d4c9731
--- /dev/null
+++ b/Library.hpp
@@ -0,0 +1,258 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * MyBam.hpp
+ *
+ *  Created on: Aug 27, 2009
+ *      Author: caa
+ */
+#ifndef LIBRARY_HPP
+#define LIBRARY_HPP
+#include <stdlib.h>
+#include <iostream>
+#include <fstream>
+#include <iomanip>
+#include <string>
+#include <boost/tuple/tuple.hpp>
+#include <string>
+#include "bam.h"
+#include "Utils.hpp"
+#include "StringHash.hpp"
+
+using namespace std;
+
+class Library
+{
+public:
+	Library()
+	{
+	}
+	Library(int type)
+	{
+		if (type == 0) {
+			maxins = 2000;
+			vector<double> counts = vector<double>(maxins, 1.0);
+			calcProb(counts);
+		} else throw string("Library type not recognized.");
+	}
+	Library(const vector<double> & counts)
+	{
+		// read library from histogram file
+		if (counts.size() == 0) {
+			cout << "HUH LIB" << endl;
+		}
+		calcProb(counts);
+	}
+
+	int getMaxInsertSize() const { return maxins; };
+	int getModus() const { return modeInsertSize; };
+	double getProb(int x) const  {
+		if (x<0)  x = -x;
+		if (x>=maxins) x = maxins-1;
+		return probs[x];
+	}
+	double getNinetyFifthPctProb() const { return ninetyfifth_pct_prob; };
+	void print() const 
+	{
+		for (size_t x=0;x<probs.size();x++) {
+			cout << " " << probs[x];
+			if (log(probs[x])>0) {
+				cout << "LOG>0: " << x << endl;
+			}
+		}
+		cout << endl;
+	}
+
+protected:
+	int modeInsertSize;
+	void calcProb(const vector<double> & counts)
+	{
+		int max_isize = 2000;
+		int max_count = -1;
+		// get insertsize at maximum
+
+		//cout << "CALCPROB: " << endl;
+		for (int s=0;s<int(counts.size());s++) {
+			if (counts[s]>=max_count) {
+				max_count = counts[s];
+				max_isize = s;
+			}
+		}
+
+		
+		maxins = 25*max_isize;
+		if (maxins>int(counts.size())) { maxins=int(counts.size()); }
+		probs=vector<double>(maxins, 0.0);
+		double z=0.0, max=-1.0;
+		modeInsertSize=0;
+		for (int d=0;d<maxins;d++) {
+			probs[d] = counts[d];
+			if (probs[d]>max) {
+				max=probs[d];
+				modeInsertSize=d;
+			}
+			z+=probs[d];
+		}
+		for (int d=0;d<maxins;d++) {
+			probs[d] /=z;
+			if (probs[d]<1e-10) probs[d]=1e-10;
+		//	cout << "probs[" << d<<"]: " << probs[d] << endl;
+		}
+
+		sortProbs = probs;
+		std::sort(sortProbs.begin(), sortProbs.end());
+		double sum = 0.0;
+		for (int x=sortProbs.size()-1;x>0;x--) {
+			sum+=sortProbs[x];
+			if (sum>0.95) {
+				ninetyfifth_pct_prob = sortProbs[x];
+				break;
+			}
+		}
+		cout << "max: " << max/z << " ninetyfifth_pct_prob: " << ninetyfifth_pct_prob << endl;
+
+	}
+	vector<double> probs, sortProbs;
+	int maxins;
+	double ninetyfifth_pct_prob;
+};
+
+class LibraryCollection : public string_hash<Library>
+{
+public:
+	LibraryCollection()
+	{
+		(*this)["single_end"]=Library(0);
+	}
+
+	void addFromFile(const string & fileName)
+	{
+		ifstream fin;
+		fin.open(fileName.c_str());
+		if (!fin.is_open()) throw string("Cannot open variant file ").append(fileName);
+
+		int numLibs = 0;
+		int numLines = 0;
+
+		vector<double> counts;
+
+		string libName;
+
+		int prev=-1;
+		int max_count = -1; // highest count
+		int max_isize = -1; // insert size corresponding to highest count
+		while (!fin.eof()) {
+			string line;
+			getline(fin, line);
+			numLines++;
+			if (line.empty()) {
+				break;
+			}
+			istringstream is(line);
+			string isize_str, count_str;
+			int isize=-1;
+			double count=-1;
+			is >> isize_str;
+			if (isize_str == "#LIB") {
+	
+				if (counts.size()>0 && !libName.empty()) {
+					// next library!!!!!
+					cout << "Storing library: " << libName << endl;
+
+
+					LibraryCollection::const_iterator it = this->begin();
+					it = this->find(libName);
+					if (it != this->end()) {
+						cerr << "Error: libName: " << libName << endl;
+						cerr << "Duplicate library IDs." << endl;
+						throw string("Library error");
+					} else {
+						(*this)[libName]=Library(counts);
+						cout << "Number of inserts: " << counts.size() << endl;
+						numLibs++;
+					}
+					counts.clear();
+					prev=-1;
+				}
+
+				string label;
+				is >> label;
+				libName = label;
+				if (label.empty()) {
+					cerr << "Cannot read library name in line " << numLines << " of " << fileName << endl;
+					throw string("Cannot read library name ");
+				}
+
+
+				goto nextline;
+
+			} else {
+				if (!from_string<int>(isize, isize_str, std::dec)) { cerr << "Error reading from library file" << endl; }
+				is >> count_str; if (!from_string<double>(count, count_str, std::dec)) { cerr << "Error reading from library file" << endl; }
+
+				if (isize!=prev+1) {
+					cout << "isize: " << isize << " prev: " << prev << endl;
+					cerr << "Library insert sizes must be consecutive" << endl;
+					throw string("Library error.");
+				}
+				if (count<0) {
+					cerr << "Library insert size count is negative.." << endl;
+					throw string("Library error.");
+				}
+
+				counts.push_back(count);
+				prev=isize;
+			}
+			nextline:
+			line.clear();
+		}
+
+		// store last library
+
+		LibraryCollection::const_iterator it = this->begin();
+		it = this->find(libName);
+		if (it != this->end()) {
+			cerr << "Duplicate library IDs." << endl;
+			throw string("Library error");
+		} else {
+			(*this)[libName]=Library(counts);
+			numLibs++;
+			cout << "Library " << libName << " loaded with " << counts.size() << " insert sizes." << endl;
+		}
+
+		if (numLibs==0) {
+			cerr << "Could not find any libraries. Are the headers specified correctly?" << endl;
+		}
+
+		fin.close();
+
+
+	}
+
+	double getMaxInsertSize() const
+	{
+		double max = -HUGE_VAL;
+		for (LibraryCollection::const_iterator it = this->begin(); it!= this->end(); it++) {
+			if (it->second.getMaxInsertSize()>max) max = it->second.getMaxInsertSize();
+		}
+		return max;
+	}
+	//const bam_header_t *getBamHeader() const { return bamHeader; }
+protected:
+	//const bam_header_t *bamHeader;
+
+};
+
+#endif
diff --git a/MLAlignment.hpp b/MLAlignment.hpp
new file mode 100644
index 0000000..6618331
--- /dev/null
+++ b/MLAlignment.hpp
@@ -0,0 +1,78 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * MLAlignment.hpp
+ *
+ *  Created on: Apr 2, 2009
+ *      Author: caa
+ */
+
+#ifndef MLALIGNMENT_HPP_
+#define MLALIGNMENT_HPP_
+#include <vector>
+#include <string>
+#include <map>
+#include "Variant.hpp"
+class MLAlignment
+{
+public:
+	static const int INS=-1;
+	static const int DEL=-2;
+	static const int LO=-3;
+	static const int RO=-4;
+	MLAlignment()
+	{
+		relPos=-1;
+		ll=0.0;
+		llOn=0.0;
+		llOff=0.0;
+		offHap=false;
+		offHapHMQ=false;
+		numIndels=0;
+		numMismatch=0;
+		hr=-1;
+		hl=-1;
+
+	}
+
+	int relPos; // relative position of read wrt haplotype
+
+
+
+	int firstBase, lastBase; //first and last base of haplotype covered by the read
+	map<int, AlignedVariant> indels, snps;
+
+	map<int, bool> hapIndelCovered, hapSNPCovered; // indels and snps in the _haplotype_ covered by the read
+
+	double ll; // loglikelihood
+	double llOn, llOff; // without priors/mapping qualities taken into account
+	bool offHap, offHapHMQ; // haplotype is mapped outside haplotype window with read-mapping quality and an artificial high-mapping-quality respectively
+	int hl, hr; // left and rightmost base on haplotype covered by the read
+	int numIndels, numMismatch;
+	int nBQT, nmmBQT; // number of aligned bases and number mismatching above threshold
+	double mLogBQ; // mean log BaseQuality
+
+	int nMMLeft, nMMRight;
+
+	string align;
+	vector<int> hpos;
+	operator double() const { return ll; };
+	void print()
+	{
+		cout << "relPos: " << relPos << " offHap: " << (int) offHap << " ll: " << ll << endl;
+	}
+};
+
+#endif /* MLALIGNMENT_HPP_ */
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f9237c4
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,15 @@
+SAMTOOLDIR=/nfs/users/nfs_c/caa/source/samtools/
+SEQANDIR=seqan_library/
+
+CPPFLAGS= -DNDEBUG -D_IOLIB=2 -DMINREADS=2 -DDINDEL
+CXXFLAGS= -I$(SAMTOOLDIR) -I$(SEQANDIR) -I./  -Wno-deprecated  -O3 
+LDFLAGS= -L$(SAMTOOLDIR)  -lbam -lz -lboost_program_options -static 
+
+SRCSDINDEL=DInDel.cpp HapBlock.cpp HaplotypeDistribution.cpp ObservationModelFB.cpp GetCandidates.cpp Faster.cpp
+OBJSDINDEL=$(SRCSDINDEL:%.cpp=%.o)  
+
+dindel:$(OBJSDINDEL) Read.hpp DInDel.hpp HapBlock.hpp Haplotype.hpp HaplotypeDistribution.hpp MyBam.hpp GetCandidates.hpp Variant.hpp Fasta.hpp OutputData.hpp MLAlignment.hpp ObservationModelSeqAn.hpp VariantFile.hpp ReadIndelErrorModel.hpp Library.hpp Faster.hpp
+	$(CXX) -o $@ $(CXXFLAGS) $(DINDELFLAGS) $(OBJSDINDEL) $(LDFLAGS) 
+
+clean:
+	rm -f $(OBJSDINDEL) $(OBJSCOMPAREVARIANTS)  $(OBJSMAKEGLF)
diff --git a/MyBam.hpp b/MyBam.hpp
new file mode 100644
index 0000000..e15ad2f
--- /dev/null
+++ b/MyBam.hpp
@@ -0,0 +1,98 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * MyBam.hpp
+ *
+ *  Created on: Aug 27, 2009
+ *      Author: caa
+ */
+#ifndef MYBAM_HPP
+#define MYBAM_HPP
+#include <stdlib.h>
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include <map>
+#include <boost/tuple/tuple.hpp>
+#include "faidx.h"
+#include "bam.h"
+using namespace std;
+
+class MyBam
+{
+public:
+	MyBam() { initialized=false; };
+	MyBam(const string &bamFile)
+	{
+		// load bam file
+		initialized=false;
+		init(bamFile);
+	}
+
+	MyBam(const MyBam & myBam)
+	{
+		initialized=false;
+		init(myBam.fileName);
+	}
+		int getTID(const string & str) const
+	{
+		map<string, int>::const_iterator it=strToTID.find(str);
+		if (it==strToTID.end()) {
+			throw string("Cannot find ID!");
+		} else return it->second;
+	}
+
+	void destroy()
+	{
+		if (initialized) {
+			bam_close(bf);
+			bam_header_destroy(bh);
+			bam_index_destroy(idx);
+		}
+		initialized=false;
+	}
+
+	~MyBam()
+	{
+		destroy();
+	}
+
+	bamFile bf;
+	bam_header_t *bh;
+	bam_index_t *idx;
+	string fileName;
+	private:
+	void init(const string & _fileName)
+	{
+
+		destroy();
+		fileName=_fileName;
+		bf=bam_open(fileName.c_str(),"r");
+		if (!bf) throw string("Cannot open BAM file.");
+		bh=bam_header_read(bf);
+		for (int nt=0;nt<bh->n_targets;nt++) {
+		//	cout << "target_name[" << nt << "]: " << string(bh->target_name[nt]) << endl;
+			strToTID[string(bh->target_name[nt])]=nt;
+		}
+
+		idx=bam_index_load(fileName.c_str());
+		initialized=true;
+	}
+	bool initialized;
+	map<string, int> strToTID;
+};
+
+
+#endif
diff --git a/ObservationModel.hpp b/ObservationModel.hpp
new file mode 100644
index 0000000..54e4c1d
--- /dev/null
+++ b/ObservationModel.hpp
@@ -0,0 +1,103 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * ObservationModel.hpp
+ *
+ *  Created on: Apr 2, 2009
+ *      Author: caa
+ */
+
+#ifndef OBSERVATIONMODEL_HPP_
+#define OBSERVATIONMODEL_HPP_
+#include <iostream>
+#include <string>
+
+using namespace std;
+class ObservationModelParameters
+{
+public:
+		ObservationModelParameters() {
+			modelType="probabilistic";
+			setDefaultValues();
+		}
+		ObservationModelParameters(const string & modelType) {
+			if (modelType=="threshold" || modelType=="probabilistic") this->modelType=modelType; else throw string("Model not supported.");
+			setDefaultValues();
+		}
+		void setDefaultValues()
+		{
+			pError=1e-4;
+			baseQualThreshold=0.995;
+			fixedBaseQual=0.99;
+			maxLengthIndel=10;
+			mapQualThreshold=100.0;
+			capMapQualFast=40.0;
+			scaleErr=0.95;
+			numE=3;
+			pMut=1e-4;
+			minOverlap=0;
+			numIndels=1;
+			indelDist="exponential";
+			maxLengthDel=maxLengthIndel;
+			pFirstgLO=0.01;
+			checkBaseQualThreshold = 0.95;
+
+			bMid=-1;
+			forceReadOnHaplotype=false;
+			mapUnmappedReads = false;
+
+			padCover=5;
+			maxMismatch=1;
+			maxTryHash=5;
+		}
+
+		void print()
+		{
+			cout << "\tmodelType: " << modelType << endl;
+			cout << "\tmaxLengthIndel: " << maxLengthIndel << " pError: " << pError << endl;
+			cout << "\tbaseQualThreshold: " << baseQualThreshold << " fixedBaseQual: " << fixedBaseQual << endl;
+			cout << "\tmapQualThreshold: " << mapQualThreshold << endl;
+			cout << "\tcapMapQualFast: " << capMapQualFast << endl;
+			cout << "\tminOverlap: " << minOverlap << endl;
+			cout << "\tscaleError: " << scaleErr << endl;
+			cout << "\tnumE: " << numE << endl;
+			cout << "\tpMut: " << pMut << endl;
+			cout << "\tnumIndels: " << numIndels << endl;
+			cout << "\tindelDistribution: " << indelDist << endl;
+			cout << "\tmaxLengthDel: " << maxLengthDel << " pError: " << pError << endl;
+			cout << "\tpFirstgLO: " << pFirstgLO << endl;
+			cout << "\tpadCover: " << padCover << endl;
+			cout << "\tmaxMismatch: " << maxMismatch << endl;
+			cout << "\tmaxTryHash: " << maxTryHash << endl;
+			cout << "\tcheckBaseQualThreshold: " << checkBaseQualThreshold << endl;
+			cout << "\tmapUnmappedReads: " << mapUnmappedReads << endl;
+			//cout << "\tlogLikThreshod: " << logLikThreshold << endl;
+		}
+		double pError, baseQualThreshold, fixedBaseQual, mapQualThreshold, capMapQualFast, scaleErr, pMut;
+		int maxLengthIndel, numE, minOverlap, numIndels, bMid;
+		double checkBaseQualThreshold;
+
+		string modelType, indelDist;
+		int maxLengthDel,maxTryHash;
+		bool forceReadOnHaplotype, mapUnmappedReads;
+		double pFirstgLO;
+		int padCover, maxMismatch;
+
+
+};
+
+
+
+#endif /* OBSERVATIONMODEL_HPP_ */
diff --git a/ObservationModelFB.cpp b/ObservationModelFB.cpp
new file mode 100644
index 0000000..189b3c3
--- /dev/null
+++ b/ObservationModelFB.cpp
@@ -0,0 +1,1829 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#include <vector>
+#include <cmath>
+#include <set>
+#include <iostream>
+#include <iomanip>
+#include <sstream>
+#include "ObservationModelFB.hpp"
+#include "Haplotype.hpp"
+#include "Read.hpp"
+#include "ReadIndelErrorModel.hpp"
+using namespace std;
+//#define DEBUGHMM
+
+ObservationModelFB::ObservationModelFB(const Haplotype & _hap, const Read & r, uint32_t hapStart, const ObservationModelParameters & _params) : read(r), params(_params)
+{
+
+	Init(_hap, hapStart);
+
+}
+
+void ObservationModelFB::Init(const Haplotype & _hap, uint32_t hapStart)
+{
+	hap=_hap.filtered();
+	memAllocated=false;
+	HMMInitialized=false;
+	HMMConsistent=false;
+	obsInitialized=false;
+	forwardDone=false;
+	backwardDone=false;
+	likelihoodComputed=false;
+	makeObsVector=false;
+	marginalsComputed=false;
+	if (params.maxLengthDel>(int) hap.size()) throw string("hapSize error.");
+
+
+	//bMid=_bMid; compute bMid position
+	uint32_t hapEnd=hapStart+hap.size();
+	uint32_t mReadStart=uint32_t(read.posStat.first);
+	uint32_t readEnd=mReadStart+uint32_t(read.size())-1;
+	uint32_t olStart, olEnd;
+	int mid;
+	if (read.isUnmapped()) {
+		bMid = int ( read.size()/2 );
+        } else {
+		if (mReadStart>hapEnd) {
+			bMid=int(read.size()/2);
+			/*
+			cout << "hapStart: " << hapStart << " hapEnd: " << hapEnd << endl;
+			cout << "mReadStart: " << mReadStart << " readEnd: " << readEnd << endl;
+			cout << "read.posStat.first: " << read.posStat.first << " read.posStat.second: " << read.posStat.second << endl;
+			cout << "Read: " << read << endl;
+			cout << "BMID error: read is not on haplotype. Changing ObservationModelParameters." << endl;
+			*/
+			//cerr << "BMIDE" << endl;
+			params.baseQualThreshold=0.0;
+		} else if (readEnd<hapStart) {
+			bMid=int(read.size()/2);
+			params.baseQualThreshold=0.0;
+			/*
+			cout << "hapStart: " << hapStart << " hapEnd: " << hapEnd << endl;
+			cout << "mReadStart: " << mReadStart << " readEnd: " << readEnd << endl;
+			cout << "read.posStat.first: " << read.posStat.first << " read.posStat.second: " << read.posStat.second << endl;
+			cout << "Read: " << read << endl;
+			cout << "BMID error: read is not on haplotype. Changing ObservationModelParameters" << endl;
+			*/
+			//cerr << "BMIDE" << endl;
+		} else {
+			olStart=(hapStart>mReadStart)?hapStart:mReadStart;
+			olEnd=(hapEnd>readEnd)?readEnd:hapEnd;
+			mid=(int(olEnd)-int(olStart))/2+int(olStart);
+			bMid=mid-int(mReadStart);
+		}
+	}
+
+	/*
+	if (bMid<0 || bMid>=int(read.size())) {
+		cout << "hapStart: " << hapStart << " readStart: " << mReadStart << " readEnd: " << readEnd << " olStart: " << olStart << " olEnd: " << olEnd << " bMid: " << bMid << " mid: " << mid << endl;
+		throw string("error");
+	}
+	*/
+
+	if (params.bMid!=-1) bMid=params.bMid;
+
+	if (bMid<0) { cout << "BMIDERROR" << endl; bMid=0; };
+	if (bMid>=int(read.size())) { cout << "BMIDERROR" << endl; bMid=int(read.size())-1; };
+
+	this->hapStart=hapStart;
+}
+
+
+void ObservationModelFB::changeHaplotype(const Haplotype & newHap)
+{
+	if (hap.size()!=newHap.filtered().size()) {
+		cout << "hap: " << hap << " newHap: " << newHap << endl;
+		throw string("New haplotype must have same length as old haplotype.");
+	}
+
+	hap=newHap.filtered();
+
+	obsInitialized=false;
+	HMMConsistent=false;
+	forwardDone=false;
+	backwardDone=false;
+	likelihoodComputed=false;
+
+}
+
+void ObservationModelFB::calcLikelihoodFromLastSlice()
+{
+	throw string("CHANGE ME! PRIOR NOT CALCULATED IN RIGHT PLACE");
+	if (likelihoodComputed) return;
+	double *alpha_l=alpha[bMid];
+	double *beta_l=beta[bMid];
+	double *obs_l=obs[bMid];
+	logLikelihood=0.0;
+	logLikelihoodNoPrior=0.0;
+	likOffHap.resize(2);
+	likOffHap[0]=0.0;
+	likOffHap[1]=0.0;
+
+	int y=0;
+	double max=0.0;
+	int maxidx=0;
+	for (int x=0;x<2*numS;x++, y++) {
+
+		double v=alpha_l[y]+obs_l[y]+beta_l[y];
+		double mar=exp(v);
+		if (mar>max) {
+			max=mar;
+			maxidx=x;
+		}
+		logLikelihood+=mar;
+		v=alpha_l[y]+obs_l[y]+beta_l[y]-prior[x];
+		double marnp=exp(v);
+		logLikelihoodNoPrior+=marnp;
+		if ((x%numS)==0) likOffHap[0]+=marnp; else if ((x%numS)!=ROState) likOffHap[1]+=marnp;
+	}
+	logLikelihood=log(logLikelihood);
+	logLikelihoodNoPrior=log(logLikelihoodNoPrior);
+	likOffHap[0]=log(likOffHap[0]);
+	likOffHap[1]=log(likOffHap[1]);
+
+	ml.ll=logLikelihood;
+	ml.llOff=likOffHap[0];
+	ml.llOn=likOffHap[1];
+	if ((maxidx%numS)==0 || (maxidx%numS)==ROState) {
+		ml.offHapHMQ=true;
+		ml.offHap=true;
+	}
+#ifdef DEBUGHMM
+	//cout << "calcLikelihoodFromLastSlice(): " << logLikelihood << endl;
+	//cout << "here: " << scientific << setprecision(10) <<  log(likOffHap[0]*exp(prior[0])+likOffHap[1]*exp(prior[1])) << " " << logLikelihood << endl;
+#endif
+	likelihoodComputed=true;
+}
+
+MLAlignment ObservationModelFB::calcLikelihood()
+{
+
+	initHMM();
+	setupReadObservationPotentials();
+	computeForwardMessages();
+	calcLikelihoodFromLastSlice();
+
+	return ml;
+}
+
+
+void ObservationModelFB::setupTransitionProbs()
+{
+	logpLOgLO=log(1.0-params.pFirstgLO);
+	logpFirstgLO=log(params.pFirstgLO);
+
+	numT=params.maxLengthDel+2;
+	logPTrans.resize(numT);
+	// maxT is the transition which corresponds to a normal-operation base extension
+	logPTrans[1]=log(1.0-params.pError);
+	double norm=0.0;
+	for (int x=1;x<numT;x++) if (x!=1) {
+		double p=-fabs(1.0-double(x));
+		logPTrans[x]=p;
+		norm+=exp(p);
+	}
+	norm=log(norm/params.pError);
+	for (int x=1;x<numT;x++) if (x!=1) logPTrans[x]-=norm;
+
+	// check norm
+	norm=0.0;
+	for (int x=1;x<numT;x++) norm+=exp(logPTrans[x]);
+	assert(fabs(norm-1.0)<1e-15);
+
+	logpInsgIns=-1.0;
+	logpNoInsgIns=log(1.0-exp(logpInsgIns));
+	logpInsgNoIns=log(params.pError);
+	logpNoInsgNoIns=log(1-params.pError);
+	/*
+	cout << "logpInsgIns: " << logpInsgIns << endl;
+	cout << "logpNoInsgIns: " << logpNoInsgIns << endl;
+	cout << "logpInsgNoIns: " << logpInsgNoIns << endl;
+	cout << "logpNoInsgNoIns: " << logpNoInsgNoIns << endl;
+	*/
+
+}
+
+
+void ObservationModelFB::setupReadObservationPotentials()
+{
+	if (obsInitialized) return;
+	int b;
+	if (params.modelType=="probabilistic") {
+		for (b=0;b<readSize;b++) {
+			double rq=read.qual[b];
+			char nuc=read.seq[b];
+			double *obs_b=obs[b];
+
+			double *obs_b_ins=&obs_b[numS];
+			double *obs_b_noins=obs_b;
+			double pr=rq*(1.0-params.pMut);
+			double eq=log(.25+.75*pr);
+			double uq=log(.75+1e-10-.75*pr);
+
+
+			obs_b_ins[0]=eq; // left of haplotype
+			obs_b_ins[hapSize+1]=eq; // right of haplotype
+
+			obs_b_noins[0]=eq; // left of haplotype
+			obs_b_noins[hapSize+1]=eq; // right of haplotype
+
+			for (int y=0;y<hapSize;y++) {
+				// given an insertion in the read assume match to prevent favoring of the insertion based on low base qualities
+				obs_b_ins[y+1]=eq;
+				if (hap[y] == 'N' || hap[y]==nuc) {
+					obs_b_noins[y+1]=eq;
+				} else {
+					obs_b_noins[y+1]=uq;
+				}
+			}
+		}
+	} else throw string("Unsupported observation model.");
+
+
+
+
+
+	//throw string("Check priors!");
+	//cout << "prior[0]: " << prior[0] << endl;
+	if (params.forceReadOnHaplotype) {
+		forceOnHap();
+	}
+
+	obsInitialized=true;
+}
+
+void ObservationModelFB::computeBMidPrior(vector<double> & _prior, double mapQual)
+{
+	double pOffFirst;
+	double mq=1.0-mapQual;
+	if (-10.0*log10(mq)>params.mapQualThreshold) {
+		mq=pow(10.0,-params.mapQualThreshold/10.0);
+	}
+	pOffFirst=mq;
+
+	_prior=vector<double>(2*numS,0.0);
+	vector<double> pinsert = vector<double>(numS,0.0);
+	if (params.mapUnmappedReads && read.isPaired()) {
+		//cout << "read: " << bam1_qname(read.getBam()) << " read.pos: " << read.pos << " read.matePos: " << read.matePos << " read.mateLen: " << read.mateLen <<  " read.isUnmapped: " << read.isUnmapped() << " mateUnmapped: " << read.mateIsUnmapped() << " bmid: " << bMid << " hapStart: " << hapStart << " read.tid: " << read.getBam()->core.tid << " read.mtid: " << read.getBam()->core.mtid << endl;
+		//
+
+		if (!read.mateIsUnmapped() && read.mateLen != -1 && read.getBam()->core.tid == read.getBam()->core.mtid) {
+			if (read.mateIsReverse()) {
+				for ( int x=1;x<hapSize+1;x++) pinsert[x] = log(read.getLibrary().getProb(abs(hapStart+x-bMid-int(read.matePos+read.mateLen))));
+			} else {
+				for ( int x=1;x<hapSize+1;x++) pinsert[x] = log(read.getLibrary().getProb(abs(hapStart+x+readSize-bMid-int(read.matePos))));
+			}
+			pinsert[0] = log(read.getLibrary().getNinetyFifthPctProb());
+			// for (int x=0;x<hapSize+1;x++) cout << " " << pinsert[x];
+			// cout << endl;
+		}
+
+	}
+
+	for (size_t i=0;i<2;i++) {
+		double logpIns=(i==1)?(logpInsgNoIns):log(1.0-exp(logpInsgNoIns));
+		_prior[i*numS+0]=log(pOffFirst)+logpIns+pinsert[0];
+		_prior[i*numS+ROState]=-100.0;
+		for (int x=1;x<hapSize+1;x++) {
+			_prior[i*numS+x]=pinsert[x]+log((1.0-pOffFirst))+logpIns;
+		}
+	}
+
+}
+
+void ObservationModelFB::forceOnHap()
+{
+	for (int b=0;b<readSize;b++) {
+		obs[b][0]=-1000.0;
+		obs[b][ROState]=-1000.0;
+		obs[b][numS]=-1000.0;
+		obs[b][ROState+numS]=-1000.0;
+	}
+
+}
+
+void ObservationModelFB::getObsVector(int b, double *vec) const
+{
+	throw string("Not implemented");
+//	for (int y=0;y<4;y++) vec[y]=obsVector[(b<<2)+y];
+}
+
+void ObservationModelFB::initHMM()
+{
+	if (HMMInitialized) return;
+	hapSize=hap.size();
+	numS=hapSize+2;
+	readSize=read.seq.size();
+	ROState=hapSize+1;
+
+
+	allocateMemory();
+
+	for (int x=0;x<2*numS;x++) {
+			alpha[0][x]=0.0;
+			beta[readSize-1][x]=0.0;
+	}
+
+	HMMInitialized=true;
+	HMMConsistent=false;
+	forwardDone=false;
+	backwardDone=false;
+	likelihoodComputed=false;
+	marginalsComputed=false;
+
+	setupTransitionProbs();
+
+}
+
+void ObservationModelFB::allocateMemory()
+{
+	if (memAllocated) return; //throw string("Memory already allocated.");
+	mar.reserve(readSize);
+	obs.reserve(readSize);
+	alpha.reserve(readSize);
+	beta.reserve(readSize);
+
+	for (int b=0;b<readSize;b++) {
+		mar.push_back(new double[numS*2]);
+		obs.push_back(new double[numS*2]);
+		alpha.push_back(new double[numS*2]);
+		beta.push_back(new double[numS*2]);
+		xmar.push_back(new double[numS]);
+	}
+	if (makeObsVector) { obsVector=new double[4*readSize]; };
+	memAllocated=true;
+}
+
+void ObservationModelFB::deleteMemory()
+{
+	if (memAllocated) {
+		for (int b=0;b<readSize;b++) {
+			delete[] mar[b];
+			delete[] obs[b];
+			delete[] alpha[b];
+			delete[] beta[b];
+			delete[] xmar[b];
+		}
+		if (makeObsVector) delete[] obsVector;
+		memAllocated=false;
+	}
+
+}
+
+
+
+void ObservationModelFB::passMessageOneInc(double *alpha_l, const double *alpha_l_1, const double *obs_l_1)
+{
+	// P(x^l=x^{l-1}+d|x^{l-1})=(10^{logpSkip*d})
+	for (int x=0;x<2*numS;x++) alpha_l[x]=0.0;
+
+	// x^l, i^l=0 = > x^{l+1}, i^{l+1}=0
+	alpha_l[0]+=( exp(obs_l_1[0]+alpha_l_1[0]+logpLOgLO+logpNoInsgNoIns) );
+	alpha_l[1]+=( exp(obs_l_1[0]+alpha_l_1[0]+logpFirstgLO+logpNoInsgNoIns ) );
+	for (int x=1;x<=hapSize;x++ ) {
+		double tmp=obs_l_1[x]+alpha_l_1[x]+logpNoInsgNoIns;
+		for (int y=1;y<numT;y++) {
+			int newx=x+y;
+			if (newx>hapSize) newx=ROState;
+			alpha_l[newx]+=exp(tmp+logPTrans[y]);
+		}
+	}
+	// RO -> RO pROgRO=1.0;
+	alpha_l[ROState]+=exp(obs_l_1[ROState]+alpha_l_1[ROState]+logpNoInsgNoIns);
+
+	// x^l, i^l=0 = > x^{l+1}=x^l, i^{l+1}=1
+	for (int x=0;x<=hapSize+1;x++ ) {
+		alpha_l[numS+x]+=exp(obs_l_1[x]+alpha_l_1[x]+logpInsgNoIns);
+	}
+
+	// x^l, i^l=1 = > x^{l+1}, i^{l+1}=1
+	for (int x=0;x<=hapSize+1;x++ ) {
+		alpha_l[numS+x]+=exp(obs_l_1[numS+x]+alpha_l_1[numS+x]+logpInsgIns);
+	}
+
+	// x^l, i^l=1 = > x^{l+1}=x^l+1, i^{l+1}=0
+	alpha_l[0]+=exp(obs_l_1[numS+0]+alpha_l_1[numS+0]+logpNoInsgIns); // cannot go from insertion on to the haplotype
+	for (int x=1;x<=hapSize+1;x++ ) {
+		int newx=x+1; if (newx>ROState) newx=ROState;
+		alpha_l[newx]+=exp(obs_l_1[numS+x]+alpha_l_1[numS+x]+logpNoInsgIns);
+	}
+
+
+	// convert back to log
+
+	for (int x=0;x<2*numS;x++) alpha_l[x]=log(alpha_l[x]);
+}
+
+// doing a pass for a P(X_{l+1}|X_l) potential where the next state is lower than current state
+
+void ObservationModelFB::passMessageOneDec(double *alpha_l, const double *alpha_l_1,const double *obs_l_1)
+{
+	// P(x^l=x^{l-1}+d|x^{l-1})=(10^{logpSkip*d})
+	for (int x=0;x<2*numS;x++) alpha_l[x]=0.0;
+
+	// x^l, i^l=0 = > x^{l+1}, i^{l+1}=0
+	alpha_l[ROState]+=( exp(obs_l_1[ROState]+alpha_l_1[ROState]+logpLOgLO+logpNoInsgNoIns) );
+	alpha_l[hapSize]+=( exp(obs_l_1[ROState]+alpha_l_1[ROState]+logpFirstgLO+logpNoInsgNoIns ) );
+	for (int x=1;x<=hapSize;x++ ) {
+		double tmp=obs_l_1[x]+alpha_l_1[x]+logpNoInsgNoIns;
+		for (int y=1;y<numT;y++) {
+			int newx=x-y;
+			if (newx<0) newx=0;
+			alpha_l[newx]+=exp(tmp+logPTrans[y]);
+		}
+	}
+	// RO -> RO pROgRO=1.0;
+	alpha_l[0]+=exp(obs_l_1[0]+alpha_l_1[0]+logpNoInsgNoIns);
+
+	// x^l, i^l=0 = > x^{l+1}=x^l-1, i^{l+1}=1
+	alpha_l[numS+ROState]+=( exp(obs_l_1[ROState]+alpha_l_1[ROState]+logpLOgLO+logpInsgNoIns ) );
+	alpha_l[numS+hapSize]+=( exp(obs_l_1[ROState]+alpha_l_1[ROState]+logpFirstgLO+logpInsgNoIns ) );
+	for (int x=0;x<=hapSize;x++ ) {
+		int newx=x-1; if (newx<0) newx=0;
+		alpha_l[numS+newx]+=exp(obs_l_1[x]+alpha_l_1[x]+logpInsgNoIns);
+	}
+
+
+	/*
+	for (int x=0;x<=hapSize+1;x++ ) {
+		alpha_l[numS+x]+=exp(obs_l_1[x]+alpha_l_1[x]+logpInsgNoIns);
+	}
+	*/
+
+	// x^l, i^l=1 = > x^{l+1}, i^{l+1}=1
+	for (int x=0;x<=hapSize+1;x++ ) {
+		alpha_l[numS+x]+=exp(obs_l_1[numS+x]+alpha_l_1[numS+x]+logpInsgIns);
+	}
+
+
+
+	// x^l, i^l=1 = > x^{l+1}=x^l, i^{l+1}=0
+	for (int x=0;x<=hapSize+1;x++ ) {
+		alpha_l[x]+=exp(obs_l_1[numS+x]+alpha_l_1[numS+x]+logpNoInsgIns);
+	}
+
+	/*
+	alpha_l[ROState]+=exp(obs_l_1[numS+ROState]+alpha_l_1[numS+ROState]+logpNoInsgIns); // cannot go from insertion on to the haplotype
+	for (int x=0;x<=hapSize;x++ ) {
+		int newx=x-1; if (newx<0) newx=0;
+		alpha_l[newx]+=exp(obs_l_1[numS+x]+alpha_l_1[numS+x]+logpNoInsgIns);
+	}
+	*/
+
+
+	// convert back to log
+	for (int x=0;x<2*numS;x++) alpha_l[x]=log(alpha_l[x]);
+}
+
+void ObservationModelFB::passMessageTwoInc(double *beta_l, const double *beta_l_1,const double *obs_l_1)
+{
+
+	// x^l, i^l=0 = > x^{l+1}, i^{l+1}=0
+	beta_l[0]=( exp(obs_l_1[0]+beta_l_1[0]+logpLOgLO+logpNoInsgNoIns) ) + ( exp(obs_l_1[1]+beta_l_1[1]+logpFirstgLO+logpNoInsgNoIns ) );
+	for (int x=1;x<=hapSize;x++ ) {
+		// double tmp=beta_l_1[x]+logpNoInsgNoIns;
+		beta_l[x]=0.0;
+		for (int y=1;y<numT;y++) {
+			int newx=x+y;
+			if (newx>hapSize) newx=ROState;
+			beta_l[x]+=exp(logPTrans[y]+logpNoInsgNoIns+beta_l_1[newx]+obs_l_1[newx]);
+		}
+	}
+
+	// RO -> RO pROgRO=1.0;
+	beta_l[ROState]=exp(obs_l_1[ROState]+beta_l_1[ROState]+logpNoInsgNoIns);
+
+	//
+	// x^l, i^l=0 = > x^{l+1}=x^l, i^{l+1}=1
+	for (int x=0;x<=hapSize+1;x++ ) {
+		beta_l[x]+=exp(obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgNoIns);
+	}
+
+	// x^l, i^l=1 = > x^{l+1}=x^l, i^{l+1}=1
+	for (int x=0;x<=hapSize+1;x++ ) {
+		beta_l[numS+x]=exp(obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgIns);
+	}
+
+	// x^l, i^l=1 = > x^{l+1}=x^l+1, i^{l+1}=0
+	beta_l[0+numS]+=exp(obs_l_1[0]+beta_l_1[0]+logpNoInsgIns); // cannot go from insertion on to the haplotype
+	for (int x=1;x<=hapSize+1;x++ ) {
+		int newx=x+1; if (newx>ROState) newx=ROState;
+		beta_l[numS+x]+=exp(obs_l_1[newx]+beta_l_1[newx]+logpNoInsgIns);
+	}
+
+
+	// convert back to log
+
+	for (int x=0;x<2*numS;x++) beta_l[x]=log(beta_l[x]);
+}
+
+
+
+void ObservationModelFB::passMessageTwoDec(double *beta_l, const double *beta_l_1,const double *obs_l_1)
+{
+	// P(x^l=x^{l-1}+d|x^{l-1})=(10^{logpSkip*d})
+	//for (int x=0;x<2*numS;x++) beta_l[x]=0.0;
+
+	// x^l, i^l=0 = > x^{l+1}, i^{l+1}=0
+	beta_l[ROState]=( exp(obs_l_1[ROState]+beta_l_1[ROState]+logpLOgLO+logpNoInsgNoIns) )+( exp(obs_l_1[hapSize]+beta_l_1[hapSize]+logpFirstgLO+logpNoInsgNoIns ) );
+	for (int x=1;x<=hapSize;x++ ) {
+		beta_l[x]=0.0;
+		for (int y=1;y<numT;y++) {
+			int newx=x-y;
+			if (newx<0) newx=0;
+			beta_l[x]+=exp(obs_l_1[newx]+logPTrans[y]+beta_l_1[newx]+logpNoInsgNoIns);
+		}
+	}
+	// RO -> RO pROgRO=1.0;
+	beta_l[0]=exp(obs_l_1[0]+beta_l_1[0]+logpNoInsgNoIns);
+
+	// x^l, i^l=0 = > x^{l+1}=x^l-1, i^{l+1}=1
+	beta_l[ROState]+=(exp(obs_l_1[numS+ROState]+beta_l_1[numS+ROState]+logpLOgLO+logpInsgNoIns)+exp(obs_l_1[numS+hapSize]+beta_l_1[numS+hapSize]+logpFirstgLO+logpInsgNoIns)); // cannot go from insertion on to the haplotype
+	for (int x=0;x<=hapSize;x++) {
+		int newx=x-1; if (newx<0) newx=0;
+		beta_l[x]+=exp(obs_l_1[numS+newx]+beta_l_1[numS+newx]+logpInsgNoIns);
+	}
+
+	/*
+	 * from forward OneDec
+	alpha_l[numS+ROState]+=( exp(obs_l_1[ROState]+alpha_l_1[ROState]+logpLOgLO+logpInsgNoIns ) );
+	alpha_l[numS+hapSize]+=( exp(obs_l_1[ROState]+alpha_l_1[ROState]+logpFirstgLO+logpInsgNoIns ) );
+	for (int x=0;x<=hapSize;x++ ) {
+		int newx=x-1; if (newx<0) newx=0;
+		alpha_l[numS+newx]+=exp(obs_l_1[x]+alpha_l_1[x]+logpInsgNoIns);
+	}
+	*/
+	/*
+	for (int x=0;x<=hapSize+1;x++ ) {
+		beta_l[x]+=exp(obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgNoIns);
+	}
+	*/
+
+	// x^l, i^l=1 = > x^{l+1}, i^{l+1}=1
+	for (int x=0;x<=hapSize+1;x++ ) {
+		beta_l[numS+x]=exp(obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgIns);
+	}
+
+	// x^l, i^l=1 = > x^{l+1}=x^l, i^{l+1}=0
+	for (int x=0;x<=hapSize+1;x++ ) {
+		beta_l[numS+x]+=exp(obs_l_1[x]+beta_l_1[x]+logpNoInsgIns);
+	}
+	// convert back to log
+	for (int x=0;x<2*numS;x++) beta_l[x]=log(beta_l[x]);
+}
+
+
+void ObservationModelFB::computeForwardMessages()
+{
+	if (forwardDone) return;
+	/*
+	for (int b=1;b<readSize;b++) {
+			passMessageTwoDec(alpha[b], alpha[b-1], obs[b-1]);
+		}
+	*/
+
+	for (int b=1;b<=bMid;b++) {
+		passMessageTwoDec(alpha[b], alpha[b-1], obs[b-1]);
+	}
+	for (int b=readSize-1;b>bMid;b--) {
+	     passMessageTwoInc(beta[b-1], beta[b], obs[b]);
+	}
+
+
+	forwardDone=true;
+}
+
+void ObservationModelFB::computeBackwardMessages()
+{
+	if (backwardDone) return;
+	/*
+	for (int b=readSize-1;b>0;b--) {
+		passMessageOneDec(beta[b-1], beta[b], obs[b]);
+	}
+	*/
+
+
+	for (int b=bMid;b>0;b--) {
+		passMessageOneDec(beta[b-1], beta[b], obs[b]);
+	}
+	for (int b=bMid+1;b<readSize;b++) {
+		passMessageOneInc(alpha[b], alpha[b-1], obs[b-1]);
+	}
+
+
+	backwardDone=true;
+}
+
+bool ObservationModelFB::_badValue(double v)
+{
+	if (isnan(v)||isinf(v)) return true; else return false;
+}
+
+bool ObservationModelFB::hasErrors()
+{
+	for (int l=0;l<readSize;l++) {
+		for (int x=0;x<hapSize+2;x++) {
+			if (_badValue(mar[l][x]) || _badValue(alpha[l][x]) || _badValue(beta[l][x]) ) {
+				return true;
+			}
+		}
+	}
+	return false;
+}
+
+
+void ObservationModelFB::computeMarginals()
+{
+	// also normalizes
+	if (marginalsComputed) return;
+#ifdef DEBUGHMM
+	vector<double> logL(readSize);
+	cout << "log-likelihoods: ";
+#endif
+	if (!HMMInitialized) initHMM();
+	if (!obsInitialized) setupReadObservationPotentials();
+	if (!forwardDone) computeForwardMessages();
+	// if (!likelihoodComputed) calcLikelihoodFromLastSlice();
+	if (!backwardDone) computeBackwardMessages();
+
+	for (int l=0;l<readSize;l++) {
+		double sum=0.0;
+		for (int x=0;x<2*numS;x++) {
+			mar[l][x]=exp(alpha[l][x]+beta[l][x]+obs[l][x]);
+			sum+=mar[l][x];
+		}
+		for (int x=0;x<2*numS;x++) mar[l][x]/=sum;
+		//if (l==0) logLikelihood=log(sum);
+#ifdef DEBUGHMM
+		logL[l]=log(sum);
+		cout << "[" << l << "," << scientific << setprecision(10) << logL[l] << "]";
+
+#endif
+	}
+#ifdef DEBUGHMM
+	cout << "end" << endl;
+#endif
+	HMMConsistent=true;
+	likelihoodComputed=true;
+	marginalsComputed=true;
+#ifdef DEBUGHMM
+	set<int> sl;
+	for (int l=0;l<readSize;l++) sl.insert(int(logL[l]*1000.0));
+	if (sl.size()!=1) {
+		//printMarginals();
+		cout << "inconsistent" << endl;
+		//throw string("HMM inconsistent!");
+	}
+#endif
+}
+
+void ObservationModelFB::computeXMarginals()
+{
+
+	if (!marginalsComputed) computeMarginals();
+	for (int b=0;b<readSize;b++) {
+		double *_mar=mar[b];
+		double *_xmar=xmar[b];
+		for (int x=0;x<numS;x++) _xmar[x]=0.0;
+
+		int y=0;
+		for (int ins=0;ins<2;ins++) {
+			for (int x=0;x<numS;x++) _xmar[x]+=_mar[y++];
+		}
+	}
+	cout.precision(2);
+
+}
+
+void ObservationModelFB::printMarginalsInt( const vector<double*> & pot)
+{
+	cout.precision(2);
+
+	for (size_t b=0;b<pot.size();b++) {
+		cout << "base["  << b << "]: " << endl;
+		int y=0;
+		for (int ins=0;ins<2;ins++) {
+			cout << " ins: " << ins <<  " ";
+			for (int x=0;x<numS;x++) {
+				stringstream os; os<<fixed <<  (pot[b][y++]);
+				string s=os.str();
+				cout << string(s,0,5) <<" " ;
+			}
+
+			cout << endl;
+			}
+	}
+
+}
+
+void ObservationModelFB::printMarginals()
+{
+	cout << "read: " << read.seq <<  " hap: " << hap.seq << endl;
+	cout << "logLikelihood: " << logLikelihood << endl;
+
+	/*
+	cout << "obs: " << endl;
+	printMarginalsInt(obs);
+	cout << "alpha: " << endl;
+	printMarginalsInt(alpha);
+	cout << "beta: " << endl;
+	printMarginalsInt(beta);
+	*/
+
+	cout << "obs: " << endl;
+	printMarginalsInt(obs);
+	cout << "alpha: " << endl;
+		printMarginalsInt(alpha);
+	cout << "mar: " << endl;
+	printMarginalsInt(mar);
+}
+
+
+void ObservationModelFB::printStatistics()
+{
+	cout << "bMid: " << bMid << endl;
+	cout << "pTrans: "; for (int x=0;x<numT;x++) cout << exp(logPTrans[x]) << " "; cout << endl;
+}
+
+void ObservationModelFB::printAlignment(size_t hapScrPos)
+{
+	if (!HMMConsistent) computeMarginals();
+	computeXMarginals();
+	// for every base determine most likely position
+	vector<double> maxP(readSize,-HUGE_VAL), entropy(readSize), obsLik(readSize);
+	vector<int> maxIdx(readSize, 0);
+
+	int min=ROState+1;
+	bool isIncreasing=true;
+	for (int b=0;b<readSize;b++) {
+		double max=-HUGE_VAL;
+		int idx;
+		double *m=xmar[b];
+		entropy[b]=0.0;
+		for (int s=0;s<hapSize+2;s++) {
+			entropy[b]+=m[s]*exp(m[s]);
+			if (m[s]>max) { max=m[s]; idx=s; };
+		}
+		maxP[b]=exp(max);
+		maxIdx[b]=idx;
+		if (b && maxIdx[b]!=0 && maxIdx[b]!=ROState) if (maxIdx[b]-maxIdx[b-1]!=1) isIncreasing=false;
+		if (idx<min) min=idx;
+
+		obsLik[b]=obs[b][idx];
+		if (b==bMid) obsLik[b]-=prior[idx];
+
+		/*
+		if (idx!=0 && idx!=ROState) {
+			char hn=hap.seq[idx-1];
+			char rn=read.seq[b];
+
+			//if (hn!=rn) cout << "b: " << b << " idx: " << idx << " " << hn << " " << rn << " ol: " << obsLik[b] << endl;
+			//if (hn==rn && obsLik[b]<-.5) cout << "b: " << b << " idx: " << idx << " " << hn << " " << rn << " ol: " << obsLik[b] << endl;
+		}
+		*/
+	}
+
+	// number of bases left and right off the haplotype
+	//printMarginalsInt(mar);
+	size_t nLeft=0, nRight=0;
+	for (int b=0;b<readSize;b++) {
+		if (maxIdx[b]==0) nLeft++; else if (maxIdx[b]==ROState) nRight++;
+	}
+
+
+	size_t rskip=0;
+	if (nLeft>hapScrPos) { rskip=1+nLeft-hapScrPos; nLeft=hapScrPos; };
+
+	size_t offset=hapScrPos-nLeft;
+
+	// print aligned read
+
+	string readString(nLeft+nRight+hapSize+readSize,' ');
+	size_t idxL=0, idxR=0;
+	for (int b=rskip;b<readSize;b++) {
+		char nuc=read.seq[b];
+		if (read.qual[b]<params.baseQualThreshold) { nuc=::tolower(nuc); };
+			if (maxIdx[b]==0) readString[idxL++]=nuc;
+			else if (maxIdx[b]==ROState) readString[nLeft+hapSize+idxR++]=nuc;
+			else readString[nLeft+maxIdx[b]-1]=nuc;
+
+	}
+
+	ostringstream os(ostringstream::out);
+	os << isIncreasing << " " << logLikelihood << " " << (maxIdx[bMid]!=0 & maxIdx[bMid] !=ROState) << " ";
+	size_t s=os.str().size();
+	size_t rs,ds;
+	if (offset>=s) {
+		rs=0;
+		ds=offset-s;
+	} else {
+		rs=s-offset;
+		ds=0;
+	}
+
+
+	cout << os.str() << string(ds,' ') << string(readString, rs, readString.size()) << endl;
+	/*
+	cout << "obsLik:" << string(offset+min-1-7,' '); for (int x=0;x<readSize;x++) cout << int(round(-obsLik[x])); cout << endl;
+	cout << "-logq: " << string(offset+min-1-7,' '); for (int x=0;x<readSize;x++) cout << int(round(-log(read.qual[x]))); cout << endl;
+	cout << "leq:   " << string(offset+min-1-7,' ');
+	for (int b=0;b<readSize;b++) {
+		double pr=read.mapQual*read.qual[b];
+		double eq=log(.25+.75*pr);
+		cout << int(round(-eq));
+	}
+	cout << endl;
+	cout << "luq:   " << string(offset+min-1-7,' ');
+	for (int b=0;b<readSize;b++) {
+			double pr=read.mapQual*read.qual[b];
+			double uq=log(1e-16+1.0-pr)+log(.25);
+			cout << int(round(-uq));
+	}
+	cout << endl;
+
+	for (int b=0;b<readSize;b++) cout << maxIdx[b] << " ";
+	cout << " isIncreasing: " << isIncreasing << " hapSize:" << hapSize << " ROState: " << ROState << endl;
+	*/
+}
+
+ObservationModelFB::~ObservationModelFB()
+{
+	deleteMemory();
+}
+
+ObservationModelFBMax::ObservationModelFBMax(const Haplotype & _hap, const Read & r, uint32_t hapStart, const ObservationModelParameters & _params)
+{
+
+	read=r;
+	params=_params;
+	Init(_hap, hapStart);
+
+}
+
+
+inline void ObservationModelFBMax::updateMax(double & destValue, int & destIdx, const double newValue, const int  newIdx)
+{
+	if (newValue>destValue+EPS) {
+		destValue=newValue;
+		destIdx=newIdx;
+	}
+	else if (newValue>=destValue && newValue<=destValue+1e-5 && destIdx>newIdx) {
+		destValue=newValue;
+		destIdx=newIdx;
+	}
+
+}
+
+
+// note that max-product works completely in the log-domain
+void ObservationModelFBMax::passMessageTwoInc(double *beta_l, const double *beta_l_1,const double *obs_l_1, int *bt_l)
+{
+
+	// x^l, i^l=0 => x^{l+1}, i^{l+1}=0
+	//beta_l[0]=( exp(obs_l_1[0]+beta_l_1[0]+logpLOgLO+logpNoInsgNoIns) ) + ( exp(obs_l_1[1]+beta_l_1[1]+logpFirstgLO+logpNoInsgNoIns ) );
+	beta_l[0]=-HUGE_VAL;
+	updateMax(beta_l[0], bt_l[0], obs_l_1[0]+beta_l_1[0]+logpLOgLO+logpNoInsgNoIns, 0);
+	updateMax(beta_l[0], bt_l[0], obs_l_1[1]+beta_l_1[1]+logpFirstgLO+logpNoInsgNoIns, 1);
+
+	for (int x=1;x<=hapSize;x++ ) {
+		// double tmp=beta_l_1[x]+logpNoInsgNoIns;
+		beta_l[x]=-HUGE_VAL;
+		for (int y=1;y<numT;y++) {
+			int newx=x+y;
+			if (newx>hapSize) newx=ROState;
+			//beta_l[x]+=exp(logPTrans[y]+logpNoInsgNoIns+beta_l_1[newx]+obs_l_1[newx]);
+			updateMax(beta_l[x], bt_l[x], logPTrans[y]+logpNoInsgNoIns+beta_l_1[newx]+obs_l_1[newx], newx);
+		}
+	}
+
+	// RO -> RO pROgRO=1.0;
+	//beta_l[ROState]=exp(obs_l_1[ROState]+beta_l_1[ROState]+logpNoInsgNoIns);
+	beta_l[ROState]=-HUGE_VAL;
+	updateMax(beta_l[ROState], bt_l[ROState],obs_l_1[ROState]+beta_l_1[ROState]+logpNoInsgNoIns, ROState);
+
+	//
+	// x^l, i^l=0 = > x^{l+1}=x^l, i^{l+1}=1
+	for (int x=0;x<=hapSize+1;x++ ) {
+		//beta_l[x]+=exp(obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgNoIns);
+		updateMax(beta_l[x], bt_l[x], obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgNoIns, numS+x);
+	}
+
+	// x^l, i^l=1 = > x^{l+1}=x^l, i^{l+1}=1
+	for (int x=0;x<=hapSize+1;x++ ) {
+		//beta_l[numS+x]=exp(obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgIns);
+		beta_l[numS+x]=obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgIns;
+		bt_l[numS+x]=numS+x;
+	}
+
+	// x^l, i^l=1 = > x^{l+1}=x^l+1, i^{l+1}=0
+	//beta_l[0+numS]+=exp(obs_l_1[0]+beta_l_1[0]+logpNoInsgIns); // cannot go from insertion on to the haplotype
+	updateMax(beta_l[0+numS], bt_l[0+numS], obs_l_1[0]+beta_l_1[0]+logpNoInsgIns, 0);
+	for (int x=1;x<=hapSize+1;x++ ) {
+		int newx=x+1; if (newx>ROState) newx=ROState;
+		//beta_l[numS+x]+=exp(obs_l_1[newx]+beta_l_1[newx]+logpNoInsgIns);
+		updateMax(beta_l[numS+x], bt_l[numS+x], obs_l_1[newx]+beta_l_1[newx]+logpNoInsgIns, newx);
+	}
+
+
+	// convert back to log
+
+	// for (int x=0;x<2*numS;x++) beta_l[x]=log(beta_l[x]);
+}
+
+
+
+
+/*
+void ObservationModelFBMax::passMessageTwoDec(double *beta_l, const double *beta_l_1,const double *obs_l_1, int *bt_l)
+{
+	// P(x^l=x^{l-1}+d|x^{l-1})=(10^{logpSkip*d})
+	//for (int x=0;x<2*numS;x++) beta_l[x]=0.0;
+
+	// x^l, i^l=0 = > x^{l+1}, i^{l+1}=0
+	//beta_l[ROState]=( exp(obs_l_1[ROState]+beta_l_1[ROState]+logpLOgLO+logpNoInsgNoIns) )+( exp(obs_l_1[hapSize]+beta_l_1[hapSize]+logpFirstgLO+logpNoInsgNoIns ) );
+
+	beta_l[ROState]=-HUGE_VAL;
+	updateMax(beta_l[ROState], bt_l[ROState], obs_l_1[ROState]+beta_l_1[ROState]+logpLOgLO+logpNoInsgNoIns, ROState);
+	updateMax(beta_l[ROState], bt_l[ROState], obs_l_1[hapSize]+beta_l_1[hapSize]+logpFirstgLO+logpNoInsgNoIns, hapSize);
+
+
+	for (int x=1;x<=hapSize;x++ ) {
+		beta_l[x]=-HUGE_VAL;
+		for (int y=1;y<numT;y++) {
+			int newx=x-y;
+			if (newx<0) newx=0;
+			//beta_l[x]+=exp(obs_l_1[newx]+logPTrans[y]+beta_l_1[newx]+logpNoInsgNoIns);
+			updateMax(beta_l[x], bt_l[x], obs_l_1[newx]+logPTrans[y]+beta_l_1[newx]+logpNoInsgNoIns, newx);
+		}
+	}
+	// RO -> RO pROgRO=1.0;
+	//beta_l[0]=exp(obs_l_1[0]+beta_l_1[0]+logpNoInsgNoIns);
+	beta_l[0]=obs_l_1[0]+beta_l_1[0]+logpNoInsgNoIns;
+	bt_l[0]=0;
+
+	// x^l, i^l=0 = > x^{l+1}=x^l, i^{l+1}=1
+	for (int x=0;x<=hapSize+1;x++ ) {
+		//beta_l[x]+=exp(obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgNoIns);
+		updateMax(beta_l[x], bt_l[x], obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgNoIns, numS+x);
+	}
+
+	// x^l, i^l=1 = > x^{l+1}, i^{l+1}=1
+	for (int x=0;x<=hapSize+1;x++ ) {
+		//beta_l[numS+x]=exp(obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgIns);
+		beta_l[numS+x]=obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgIns;
+		bt_l[numS+x]=numS+x;
+	}
+
+	// x^l, i^l=1 = > x^{l+1}=x^l+1, i^{l+1}=0
+	//beta_l[numS+ROState]+=exp(obs_l_1[ROState]+beta_l_1[ROState]+logpNoInsgIns); // cannot go from insertion on to the haplotype
+	updateMax(beta_l[numS+ROState], bt_l[numS+ROState], obs_l_1[ROState]+beta_l_1[ROState]+logpNoInsgIns, ROState);
+
+	for (int x=0;x<=hapSize;x++ ) {
+		int newx=x-1; if (newx<0) newx=0;
+		//beta_l[numS+x]+=exp(obs_l_1[newx]+beta_l_1[newx]+logpNoInsgIns);
+		updateMax(beta_l[numS+x], bt_l[numS+x],obs_l_1[newx]+beta_l_1[newx]+logpNoInsgIns, newx );
+	}
+	// convert back to log
+	// for (int x=0;x<2*numS;x++) beta_l[x]=log(beta_l[x]);
+}
+*/
+void ObservationModelFBMax::passMessageTwoDec(double *beta_l, const double *beta_l_1,const double *obs_l_1, int *bt_l)
+{
+	// P(x^l=x^{l-1}+d|x^{l-1})=(10^{logpSkip*d})
+
+	// x^l, i^l=0 = > x^{l+1}, i^{l+1}=0
+	beta_l[ROState]=-HUGE_VAL;
+	updateMax(beta_l[ROState], bt_l[ROState], obs_l_1[ROState]+beta_l_1[ROState]+logpLOgLO+logpNoInsgNoIns, ROState);
+	updateMax(beta_l[ROState], bt_l[ROState], obs_l_1[hapSize]+beta_l_1[hapSize]+logpFirstgLO+logpNoInsgNoIns, hapSize);
+
+	for (int x=1;x<=hapSize;x++ ) {
+		beta_l[x]=-HUGE_VAL;
+		for (int y=1;y<numT;y++) {
+			int newx=x-y;
+			if (newx<0) newx=0;
+			//beta_l[x]+=exp(obs_l_1[newx]+logPTrans[y]+beta_l_1[newx]+logpNoInsgNoIns);
+			updateMax(beta_l[x], bt_l[x], obs_l_1[newx]+logPTrans[y]+beta_l_1[newx]+logpNoInsgNoIns, newx);
+		}
+	}
+	// RO -> RO pROgRO=1.0;
+	//beta_l[0]=exp(obs_l_1[0]+beta_l_1[0]+logpNoInsgNoIns);
+	beta_l[0]=obs_l_1[0]+beta_l_1[0]+logpNoInsgNoIns;
+	bt_l[0]=0;
+
+
+	// x^l, i^l=0 = > x^{l+1}=x^l-1, i^{l+1}=1
+	//beta_l[ROState]+=(exp(obs_l_1[numS+ROState]+beta_l_1[numS+ROState]+logpLOgLO+logpInsgNoIns)+exp(obs_l_1[numS+hapSize]+beta_l_1[numS+hapSize]+logpFirstgLO+logpInsgNoIns)); // cannot go from insertion on to the haplotype
+	updateMax(beta_l[ROState], bt_l[ROState],obs_l_1[numS+ROState]+beta_l_1[numS+ROState]+logpLOgLO+logpInsgNoIns, numS+ROState);
+	updateMax(beta_l[ROState], bt_l[ROState],obs_l_1[numS+hapSize]+beta_l_1[numS+hapSize]+logpFirstgLO+logpInsgNoIns, numS+hapSize);
+
+	for (int x=0;x<=hapSize;x++) {
+		int newx=x-1; if (newx<0) newx=0;
+		//beta_l[x]+=exp(obs_l_1[numS+newx]+beta_l_1[numS+newx]+logpInsgNoIns);
+		updateMax(beta_l[x], bt_l[x],obs_l_1[numS+newx]+beta_l_1[numS+newx]+logpInsgNoIns, numS+newx);
+	}
+
+
+
+	// x^l, i^l=1 = > x^{l+1}, i^{l+1}=1
+	for (int x=0;x<=hapSize+1;x++ ) {
+	//	beta_l[numS+x]=exp(obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgIns);
+		beta_l[numS+x]=obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgIns;
+		bt_l[numS+x]=numS+x;
+	}
+
+	// x^l, i^l=1 = > x^{l+1}=x^l, i^{l+1}=0
+	for (int x=0;x<=hapSize+1;x++ ) {
+	//	beta_l[numS+x]+=exp(obs_l_1[x]+beta_l_1[x]+logpNoInsgIns);
+		updateMax(beta_l[numS+x], bt_l[numS+x],obs_l_1[x]+beta_l_1[x]+logpNoInsgIns, x);
+	}
+	// convert back to log
+	//for (int x=0;x<2*numS;x++) beta_l[x]=log(beta_l[x]);
+}
+
+
+void ObservationModelFBMax::runHMM()
+{
+	if (HMMConsistent) return;
+	initHMM();
+	setupReadObservationPotentials();
+	computeForwardMessages();
+	calcLikelihoodFromLastSlice();
+	computeMAPState();
+	HMMConsistent=true;
+}
+
+MLAlignment ObservationModelFBMax::calcLikelihood()
+{
+	runHMM();
+	reportVariants();
+	return ml;
+}
+
+void ObservationModelFBMax::calcLikelihoodFromLastSlice()
+{
+	if (likelihoodComputed) return;
+	double *alpha_l=alpha[bMid];
+	double *beta_l=beta[bMid];
+	double *obs_l=obs[bMid];
+	logLikelihood=-HUGE_VAL;
+	logLikelihoodNoPrior=0.0;
+	likOffHap.resize(2);
+	likOffHap[0]=-HUGE_VAL;
+	likOffHap[1]=-HUGE_VAL;
+
+	int mapStateRMQ=0;
+
+	double llHMQ = -HUGE_VAL;
+
+	vector<double> priorRMQ, priorHMQ;
+	computeBMidPrior(priorRMQ, read.mapQual);
+	computeBMidPrior(priorHMQ, 1.0-1e-10);
+
+	int y=0;
+	for (int x=0;x<2*numS;x++, y++) {
+
+		double v=alpha_l[y]+obs_l[y]+beta_l[y]+priorRMQ[y];
+		if (v>logLikelihood+EPS) {
+			logLikelihood=v;
+			mapStateRMQ=x;
+		}
+
+		if ((x%numS)==0) {
+			if (v>likOffHap[0]) likOffHap[0]=v;
+		} else if ((x%numS)!=ROState) {
+			if (v>likOffHap[1]) likOffHap[1]=v;
+		}
+
+		v=alpha_l[y]+obs_l[y]+beta_l[y]+priorHMQ[y];
+		if (v>llHMQ+EPS) {
+			llHMQ=v;
+			mapState[bMid]=x;
+		}
+
+
+	}
+	//cout << "read: " << bam1_qname(this->read.getBam()) << " read.pos: " << read.pos << " matePos: " << this->read.matePos << " lib: " <<  " prior[" << bMid << ":" << mapState[bMid] << "]: " << priorRMQ[mapStateRMQ] << endl;
+	//cout << "lib: " << this->read.getLibraryName() << endl;
+	ml.ll=logLikelihood;
+	if ((mapState[bMid]%numS)==0 || (mapState[bMid]%numS)==ROState) {
+		ml.offHapHMQ=true;
+	}else {
+		ml.offHapHMQ=false;
+	}
+
+	if ((mapStateRMQ%numS)==0 || (mapStateRMQ%numS)==ROState) {
+		ml.offHap=true;
+	}else {
+		ml.offHap=false;
+	}
+
+
+	ml.llOff=likOffHap[0];
+	ml.llOn=likOffHap[1];
+
+	// now recompute mapState: we want to only show alignments to the haplotype
+
+#ifdef DEBUGHMM
+	//cout << "calcLikelihoodFromLastSlice(): " << logLikelihood << endl;
+	//cout << "here: " << scientific << setprecision(10) <<  log(likOffHap[0]*exp(prior[0])+likOffHap[1]*exp(prior[1])) << " " << logLikelihood << endl;
+#endif
+	likelihoodComputed=true;
+}
+
+
+
+void ObservationModelFBMax::computeMAPState()
+{
+
+	// now backtrack
+	for (int b=bMid; b>0;b--) {
+//		cout << "mapState[" << b << "]: " << mapState[b] << " btf[]: " << btf[b][mapState[b]] << endl;
+		mapState[b-1]=btf[b][mapState[b]];
+	}
+
+	for (int b=bMid;b<readSize-1;b++) {
+		mapState[b+1]=btb[b][mapState[b]];
+//		cout << "mapState[" << b << "]: " << mapState[b] << " btf[]: " << btb[b][mapState[b]] << endl;
+
+	}
+
+	//cout << "mapState: "; for (int b=0;b<readSize;b++) cout << " " << mapState[b]; cout << endl;
+
+}
+
+/*
+void ObservationModelFBMax::reportVariants(map<int, ReportVariant > & indels, map<int, ReportVariant > & snps, string & align)
+{
+	runHMM();
+
+	align=string(hapSize, 'R');
+	indels.clear();
+	snps.clear();
+
+	ml.firstBase=-1;
+	ml.lastBase=-1;
+
+	int b=0;
+	while (b<readSize) {
+		// only report variants for bases that are on the haplotype
+		int s=mapState[b];
+		if ( (s%numS)>0 && (s%numS)<=hapSize ) {
+			if (s>=numS) { // insertion
+				int pos=(s%numS)-1; // position of insertion wrt haplotype
+				int len=0; // length of insertion
+				int rpos=b; // start base of insertion in read
+				while (b<readSize && mapState[b]>=numS) {
+					b++;
+					len++;
+				}
+				indels[pos]=ReportVariant(len, read.seq.seq.substr(rpos, len), b-len);
+				indels[pos]=ReportVariant()
+			} else {
+				// update firstBase and lastBase
+				if (ml.firstBase==-1) ml.firstBase=s-1; else if (s-1<ml.firstBase) ml.firstBase=s-1;
+				if (ml.lastBase==-1) ml.lastBase=s-1; else if (s-1>ml.lastBase) ml.lastBase=s-1;
+
+
+				// check for SNP
+				if (read.seq[b]!=hap.seq[s-1]) {
+					string snp;
+					snp+=hap.seq[s-1];
+					snp.append("=>");
+					snp+=read.seq[b];
+					snps[s-1]=(ReportVariant(0,snp, b));
+					align[s-1]=read.seq[b];
+				}
+				// check for deletion
+				if (b<readSize-1) {
+					int ns=mapState[b+1];
+					if (ns<numS && ns-s>1) { // make sure next state is not an insertion..
+						int pos=s+1-1;
+						int len=-(ns-s-1);
+						indels[pos]=ReportVariant(len, hap.seq.substr(pos, -len), b);
+						for (int y=pos;y<-len+pos;y++) align[y]='D';
+					}
+				}
+
+			}
+
+		}
+
+		b++;
+	}
+
+
+}
+*/
+/*
+void getUniqueCoordinates(const Haplotype & hap, const Read & read, AlignedVariant & av)
+{
+	int rightFlankHap, leftFlankHap,rightFlankRead, leftFlankRead;
+
+	if (av.getType()==Variant::INS || av.getType() == Variant::DEL) {
+		const string & seq = av.getSeq();
+		int l = seq.size();
+
+		if (1) {
+
+
+			int p = av.getStartHap();
+			cout << "startHap: " << p << endl;
+			cout << "startRead: " << av.getStartRead() << endl;
+
+			while (p+l<=hap.seq.size()) {
+				string ss = hap.seq.substr(p,l);
+				if (ss!=seq) {
+
+					break;
+				}
+				p+=l;
+			}
+
+			rightFlankHap = p;
+
+			p = av.getStartHap()-l;
+
+			while (p>=0) {
+				string ss = hap.seq.substr(p,l);
+				if (ss!=seq) {
+					p+=l;
+					break;
+				}
+				p-=l;
+			}
+
+			leftFlankHap = p-1;
+			if (leftFlankHap<0) leftFlankHap = 0;
+
+			cout << "leftFlankHap: " << leftFlankHap << " rightFlankHap: " << rightFlankHap << endl;
+		}
+
+		// do read flanks
+
+		if (av.getType() == Variant::INS) {
+			int p = av.getStartRead();
+			cout << "startRead: " << p << endl;
+			while (p+l<=read.seq.size()) {
+				string ss = read.seq.seq.substr(p,l);
+				if (ss!=seq) {
+
+					break;
+				}
+				p+=l;
+			}
+
+			rightFlankRead = p;
+
+			p = av.getStartRead()-l;
+
+			while (p>=0) {
+				string ss = read.seq.seq.substr(p,l);
+				if (ss!=seq) {
+					p+=l;
+					break;
+				}
+				p-=l;
+			}
+
+			int leftFlankRead = p-1;
+			if (leftFlankRead<0) leftFlankRead = 0;
+
+			cout << "leftFlankRead: " << leftFlankRead << " rightFlankRead: " << rightFlankRead << endl;
+
+
+
+		} else if (av.getType() == Variant::DEL) {
+					int p = av.getStartRead()+1;
+					cout << "startRead: " << p << endl;
+					while (p+l<=read.seq.size()) {
+						string ss = read.seq.seq.substr(p,l);
+						if (ss!=seq) {
+
+							break;
+						}
+						p+=l;
+					}
+
+					rightFlankRead = p;
+
+					p = av.getStartRead()+1-l;
+
+					while (p>=0) {
+						string ss = read.seq.seq.substr(p,l);
+						if (ss!=seq) {
+							p+=l;
+							break;
+						}
+						p-=l;
+					}
+
+					int leftFlankRead = p-1;
+					if (leftFlankRead<0) leftFlankRead = 0;
+
+					cout << "leftFlankRead: " << leftFlankRead << " rightFlankRead: " << rightFlankRead << endl;
+
+
+
+				}
+
+
+	}
+
+
+
+}
+*/
+
+
+void ObservationModelFBMax::reportVariants()
+{
+	runHMM();
+
+	ml.align=string(hapSize, 'R');
+	ml.indels.clear();
+	ml.snps.clear();
+
+	ml.firstBase=-1;
+	ml.lastBase=-1;
+	ml.hapIndelCovered.clear();
+	ml.hapSNPCovered.clear();
+	ml.hpos.clear();
+	ml.hpos.resize(readSize);
+
+	ml.nBQT=0;
+	ml.nmmBQT=0;
+	ml.mLogBQ=0.0;
+	ml.nMMRight=0;
+	ml.nMMLeft=0;
+	ml.numIndels = 0;
+	ml.numMismatch = 0;
+
+	int b=0;
+	while (b<readSize) {
+		// only report variants for bases that are on the haplotype
+		int s=mapState[b];
+		if ( (s%numS)>0 && (s%numS)<=hapSize ) {
+			if (s>=numS) { // insertion
+				int pos=(s%numS)-1+1; // position of insertion wrt haplotype MAINTAIN CONVENTION OF INSERTION BEFORE BASE X
+				int len=0; // length of insertion
+				int rpos=b; // start base of insertion in read
+				while (b<readSize && mapState[b]>=numS) {
+					ml.hpos[b]=MLAlignment::INS;
+					b++;
+					len++;
+				}
+				int readStart=rpos;
+				int readEnd=b-1;
+				int hapStart=pos;
+				int hapEnd=pos;
+				string seq=read.seq.seq.substr(rpos,len);
+				ml.indels[pos]=AlignedVariant(string("+").append(seq),  hapStart, hapEnd, readStart, readEnd);
+				ml.numIndels++;
+				b--;
+				//getFlankingCoordinatesBetter(this->hap, this->read, ml.indels[pos]);
+
+			} else {
+				ml.hpos[b]=s-1;
+				// update firstBase and lastBase
+				if (ml.firstBase==-1) ml.firstBase=s-1; else if (s-1<ml.firstBase) ml.firstBase=s-1;
+				if (ml.lastBase==-1) ml.lastBase=s-1; else if (s-1>ml.lastBase) ml.lastBase=s-1;
+
+				if (read.qual[b]>params.checkBaseQualThreshold){
+					ml.nBQT++;
+					ml.mLogBQ+=log10(1.0-read.qual[b]);
+				}
+
+				// check for SNP
+				if (read.seq[b]!=hap.seq[s-1]) {
+					string snp;
+					snp+=hap.seq[s-1];
+					snp.append("=>");
+					snp+=read.seq[b];
+					int readStart=b;
+					int readEnd=b;
+					int hapStart=s-1;
+					int hapEnd=s-1;
+
+					if (read.qual[b]>params.checkBaseQualThreshold) {
+						ml.nmmBQT++;
+
+					}
+
+					if (b<6) ml.nMMLeft++;
+					if (b>readSize-6) ml.nMMRight++;
+
+
+					if (read.qual[b]>0.95) ml.numMismatch++;
+
+
+
+					ml.snps[s-1]=AlignedVariant(snp,hapStart, hapEnd, readStart, readEnd);
+					ml.align[s-1]=read.seq[b];
+				}
+				// check for deletion
+				if (b<readSize-1) {
+					int ns=mapState[b+1];
+					if (ns<numS && ns-s>1) { // make sure next state is not an insertion..
+						int pos=s+1-1;
+						int len=-(ns-s-1);
+						//indels[pos]=ReportVariant(len, hap.seq.substr(pos, -len), b);
+
+						for (int y=pos;y<-len+pos;y++) ml.align[y]='D';
+						int readStart=b;
+						int readEnd=b+1;
+						int hapStart=pos;
+						int hapEnd=pos-len-1;
+						string seq=hap.seq.substr(pos,-len);
+						ml.indels[pos]=AlignedVariant(string("-").append(seq), hapStart, hapEnd, readStart, readEnd);
+						//getFlankingCoordinatesBetter(this->hap, this->read, ml.indels[pos]);
+						ml.numIndels++;
+					}
+				}
+
+			}
+
+		} else {// on haplotype
+			if (s%numS==0) ml.hpos[b]=MLAlignment::LO; else ml.hpos[b]=MLAlignment::RO;
+
+		}
+		b++;
+	}
+
+	for (map<int,AlignedVariant>::const_iterator it=hap.indels.begin();it!=hap.indels.end();it++) {
+		const AlignedVariant & av=it->second;
+		if (av.isCovered(params.padCover, ml.firstBase, ml.lastBase)) ml.hapIndelCovered[it->first]=true; else ml.hapIndelCovered[it->first]=false;
+	}
+	for (map<int,AlignedVariant>::const_iterator it=hap.snps.begin();it!=hap.snps.end();it++) {
+		const AlignedVariant & av=it->second;
+		if (av.isCovered(params.padCover, ml.firstBase, ml.lastBase)) ml.hapSNPCovered[it->first]=true; else ml.hapSNPCovered[it->first]=false;
+	}
+
+
+}
+
+void ObservationModelFBMax::printAlignment(size_t hapScrPos)
+{
+	// count how many bases in the read are left of the haplotype
+	calcLikelihood();
+
+	/*
+	for (int b=0;b<readSize;b++) {
+		int s=mapState[b];
+		//cout << "[" << b << " " << read.seq.seq[b] << " " << hap.seq[(s%numS)-1] << " " << s << "]";
+
+	}
+	*/
+
+	string leftHap, rightHap;
+	string rhap(hap.size(),' ');
+	string ins;
+
+	bool insact=false;
+	int b=0;
+	while (b<readSize) {
+		// only report variants for bases that are on the haplotype
+		int s=mapState[b];
+		char nuc=read.seq.seq[b];
+		if (s%numS==0) {
+			//
+			leftHap+=nuc;
+		} else if ( (s%numS)>0 && (s%numS)<=hapSize ) {
+			if (s>=numS) { // insertion
+				if (!insact) {
+					insact=true;
+					ins+='[';
+					stringstream os; os << (s%numS);
+					ins.append(os.str());
+					ins+=' ';
+				}
+
+				ins+=nuc;
+
+			} else {
+				if (insact) ins+=']';
+				insact=false;
+				rhap[s-1]=nuc;
+
+				if (b<readSize-1) {
+					int ns=mapState[b+1];
+					if (ns<numS && ns-s>1) {
+						int len=ns-s-1;
+						rhap.replace(s, len, string(len,'_'));
+					}
+
+
+				}
+
+
+			}
+
+		} else {
+			rightHap+=nuc;
+		}
+		b++;
+	}
+	if (insact) ins+=']';
+
+	stringstream os;
+	os << readSize << " " << ml.offHap << " " << ml.indels.size() << " " << ml.firstBase << " " << ml.lastBase << " " << logLikelihood << " ";
+	for (map<int,AlignedVariant>::const_iterator it=hap.indels.begin();it!=hap.indels.end();it++) {
+		if  (ml.hapIndelCovered[it->first]) os << "1 "; else os << "0 ";
+	}
+	string prefix=os.str();
+
+	int leftHapSpace=int(hapScrPos)-int(prefix.size());
+	if (leftHapSpace<0) leftHapSpace=0;
+
+	string prLeftHap=string(leftHapSpace,' ');
+
+	if (int(leftHap.size())>leftHapSpace) {
+		prLeftHap=leftHap.substr(leftHap.size()-leftHapSpace, leftHapSpace);
+	} else if (leftHap.size()>0) {
+		prLeftHap.replace(leftHapSpace-leftHap.size(), leftHap.size(), leftHap);
+	}
+
+	cout << prefix<<prLeftHap<<rhap<<rightHap << " " << ins << " read: " << read.seq.seq << endl;
+
+
+	for (map<int,AlignedVariant>::const_iterator it=hap.indels.begin();it!=hap.indels.end();it++) {
+		cout << " " << it->first;
+	}
+	cout << endl;
+}
+
+
+
+void ObservationModelFBMax::computeForwardMessages()
+{
+	if (forwardDone) return;
+
+	for (int b=1;b<=bMid;b++) {
+		passMessageTwoDec(alpha[b], alpha[b-1], obs[b-1], btf[b]);
+	}
+	for (int b=readSize-1;b>bMid;b--) {
+	     passMessageTwoInc(beta[b-1], beta[b], obs[b], btb[b-1]);
+	}
+
+	forwardDone=true;
+}
+
+void ObservationModelFBMax::computeBackwardMessages()
+{
+	// no backward messages for this model
+	backwardDone=true;
+}
+
+void ObservationModelFBMax::allocateMemory()
+{
+	if (memAllocated) return; //throw string("Memory already allocated.");
+	mapState.resize(readSize, 0);
+
+
+	obs.reserve(readSize);
+	alpha.reserve(readSize);
+	beta.reserve(readSize);
+	btf.reserve(readSize);
+	btb.reserve(readSize);
+
+	for (int b=0;b<readSize;b++) {
+		obs.push_back(new double[numS*2]);
+		alpha.push_back(new double[numS*2]);
+		beta.push_back(new double[numS*2]);
+		if (b<=bMid) {
+			btf.push_back(new int[numS*2]);
+		} else btf.push_back(NULL);
+		if (b>=bMid) {
+			btb.push_back(new int[numS*2]);
+		} else btb.push_back(NULL);
+	}
+	if (makeObsVector) { obsVector=new double[4*readSize]; };
+	memAllocated=true;
+}
+
+void ObservationModelFBMax::deleteMemory()
+{
+	if (memAllocated) {
+		for (int b=0;b<readSize;b++) {
+			delete[] obs[b];
+			delete[] alpha[b];
+			delete[] beta[b];
+			if (btf[b]!=NULL) delete[] btf[b];
+			if (btb[b]!=NULL) delete[] btb[b];
+		}
+		if (makeObsVector) delete[] obsVector;
+		memAllocated=false;
+	}
+}
+
+ObservationModelFBMaxErr::ObservationModelFBMaxErr(const Haplotype & _hap, const Read & r, uint32_t hapStart, const ObservationModelParameters & _params)
+{
+
+	read=r;
+	params=_params;
+	Init(_hap, hapStart);
+}
+
+
+
+void ObservationModelFBMaxErr::setupTransitionProbs()
+{
+	logpLOgLO=log(1.0-params.pFirstgLO);
+	logpFirstgLO=log(params.pFirstgLO);
+
+	numT=params.maxLengthDel+2;
+	logPTrans.resize(numT);
+	// maxT is the transition which corresponds to a normal-operation base extension
+	logPTrans[1]=log(1.0-params.pError);
+	double norm=0.0;
+	for (int x=1;x<numT;x++) if (x!=1) {
+		double p=-fabs(1.0-double(x));
+		logPTrans[x]=p;
+		norm+=exp(p);
+	}
+	norm=log(norm/params.pError);
+	for (int x=1;x<numT;x++) if (x!=1) logPTrans[x]-=norm;
+
+	// check norm
+	norm=0.0;
+	for (int x=1;x<numT;x++) norm+=exp(logPTrans[x]);
+	assert(fabs(norm-1.0)<1e-15);
+
+	logpInsgIns=-.5 ;
+	logpNoInsgIns=log(1.0-exp(logpInsgIns));
+	logpInsgNoIns=log(params.pError);
+	logpNoInsgNoIns=log(1-params.pError);
+	/*
+	cout << "logpInsgIns: " << logpInsgIns << endl;
+	cout << "logpNoInsgIns: " << logpNoInsgIns << endl;
+	cout << "logpInsgNoIns: " << logpInsgNoIns << endl;
+	cout << "logpNoInsgNoIns: " << logpNoInsgNoIns << endl;
+	*/
+
+	// determine base-specific error probabilities
+	ReadIndelErrorModel riem;
+
+	logProbError = vector<double>(hapSize+2,log(1e-5));
+	logProbNoError = vector<double>(hapSize+2,log(1-1e-5));
+
+
+	int len=1;
+	double perr=riem.getViterbiHPError(1);
+	logProbError[1]=log(perr);
+	logProbNoError[1]=log(1.0-perr);
+
+	// NOTE X = ( LO, 0, 1, 2, 3, .. )
+	for (int b=1;b<hapSize;b++) {
+		if (hap.seq[b]==hap.seq[b-1]) {
+			len++;
+		} else {
+			perr=riem.getViterbiHPError(len);
+//			cout << "len: " << len << " perr: " << perr << endl;
+			logProbError[b]=log(perr);
+			logProbNoError[b]=log(1.0-perr);
+			len=1;
+		}
+		//cout << "hap[" << b << "]: " << len << " " << logProbError[b+1]  << endl;
+	}
+	perr=riem.getViterbiHPError(len);
+	// cout << "len: " << len << " perr: " << perr << endl;
+	logProbError[hapSize-1]=log(perr);
+	logProbNoError[hapSize-1]=log(1.0-perr);
+
+
+	/*
+	cout << "logProbError: " << endl;
+	for (int x=0;x<=hapSize+1;x++) {
+		cout << "x: " << x << " " << ((x>0&&x<=hapSize)?hap.seq[x-1]:'N') << " " << logProbError[x]  << " " << logProbNoError[x] <<endl;
+	}
+	*/
+
+}
+
+void ObservationModelFBMaxErr::passMessageTwoInc(double *beta_l, const double *beta_l_1,const double *obs_l_1, int *bt_l)
+{           //                                       b-1                           b
+
+	// x^l, i^l=0 => x^{l+1}, i^{l+1}=0
+	//beta_l[0]=( exp(obs_l_1[0]+beta_l_1[0]+logpLOgLO+logpNoInsgNoIns) ) + ( exp(obs_l_1[1]+beta_l_1[1]+logpFirstgLO+logpNoInsgNoIns ) );
+	beta_l[0]=-HUGE_VAL;
+	updateMax(beta_l[0], bt_l[0], obs_l_1[0]+beta_l_1[0]+logpLOgLO+logpNoInsgNoIns, 0);
+	updateMax(beta_l[0], bt_l[0], obs_l_1[1]+beta_l_1[1]+logpFirstgLO+logpNoInsgNoIns, 1);
+
+	for (int x=1;x<=hapSize;x++ ) {
+		// double tmp=beta_l_1[x]+logpNoInsgNoIns;
+		beta_l[x]=-HUGE_VAL;
+		for (int y=1;y<numT;y++) {
+			int newx=x+y;
+			if (newx>hapSize) newx=ROState;
+			double lpn=logProbNoError[newx];
+			double lpt=logProbError[newx];
+			double lp=(y==1)?lpn:(lpt+double(y-1)*logpInsgIns);
+
+			//beta_l[x]+=exp(logPTrans[y]+logpNoInsgNoIns+beta_l_1[newx]+obs_l_1[newx]);
+			updateMax(beta_l[x], bt_l[x], lp+lpn+beta_l_1[newx]+obs_l_1[newx], newx);
+		}
+	}
+
+	// RO -> RO pROgRO=1.0;
+	//beta_l[ROState]=exp(obs_l_1[ROState]+beta_l_1[ROState]+logpNoInsgNoIns);
+	beta_l[ROState]=-HUGE_VAL;
+	updateMax(beta_l[ROState], bt_l[ROState],obs_l_1[ROState]+beta_l_1[ROState]+logProbNoError[ROState], ROState);
+
+	//
+	// x^l, i^l=0 = > x^{l+1}=x^l, i^{l+1}=1
+	for (int x=0;x<=hapSize;x++ ) {
+		//beta_l[x]+=exp(obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgNoIns);
+		updateMax(beta_l[x], bt_l[x], obs_l_1[numS+x]+beta_l_1[numS+x]+logProbError[x+1], numS+x);
+	}
+	int x=hapSize+1; updateMax(beta_l[x], bt_l[x], obs_l_1[numS+x]+beta_l_1[numS+x], numS+x);
+
+
+	// x^l, i^l=1 = > x^{l+1}=x^l, i^{l+1}=1
+	for (int x=0;x<=hapSize+1;x++ ) {
+		//beta_l[numS+x]=exp(obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgIns);
+		beta_l[numS+x]=obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgIns;
+		bt_l[numS+x]=numS+x;
+	}
+
+	// x^l, i^l=1 = > x^{l+1}=x^l+1, i^{l+1}=0
+	//beta_l[0+numS]+=exp(obs_l_1[0]+beta_l_1[0]+logpNoInsgIns); // cannot go from insertion on to the haplotype
+	updateMax(beta_l[0+numS], bt_l[0+numS], obs_l_1[0]+beta_l_1[0]+logpNoInsgIns, 0);
+	for (int x=1;x<=hapSize+1;x++ ) {
+		int newx=x+1; if (newx>ROState) newx=ROState;
+		//beta_l[numS+x]+=exp(obs_l_1[newx]+beta_l_1[newx]+logpNoInsgIns);
+		updateMax(beta_l[numS+x], bt_l[numS+x], obs_l_1[newx]+beta_l_1[newx]+logpNoInsgIns, newx);
+	}
+
+
+	// convert back to log
+
+	// for (int x=0;x<2*numS;x++) beta_l[x]=log(beta_l[x]);
+}
+
+void ObservationModelFBMaxErr::passMessageTwoDec(double *beta_l, const double *beta_l_1,const double *obs_l_1, int *bt_l)
+{ // 														b                 b-1
+	// P(x^l=x^{l-1}+d|x^{l-1})=(10^{logpSkip*d})
+
+	// x^l, i^l=0 = > x^{l+1}, i^{l+1}=0
+	beta_l[ROState]=-HUGE_VAL;
+	updateMax(beta_l[ROState], bt_l[ROState], obs_l_1[ROState]+beta_l_1[ROState]+logpLOgLO+logpNoInsgNoIns, ROState);
+	updateMax(beta_l[ROState], bt_l[ROState], obs_l_1[hapSize]+beta_l_1[hapSize]+logpFirstgLO+logpNoInsgNoIns, hapSize);
+
+	for (int x=1;x<=hapSize;x++ ) {
+		beta_l[x]=-HUGE_VAL;
+		double lpt = logProbError[x];
+		double lpn = logProbNoError[x];
+		for (int y=1;y<numT;y++) {
+			int newx=x-y;
+			if (newx<0) newx=0;
+			double lp=(y==1)?lpn:(lpt+double(y-1)*logpInsgIns);
+			//beta_l[x]+=exp(obs_l_1[newx]+logPTrans[y]+beta_l_1[newx]+logpNoInsgNoIns);
+			updateMax(beta_l[x], bt_l[x], obs_l_1[newx]+lp+beta_l_1[newx]+lpn, newx);
+		}
+	}
+	// RO -> RO pROgRO=1.0;
+	//beta_l[0]=exp(obs_l_1[0]+beta_l_1[0]+logpNoInsgNoIns);
+	beta_l[0]=obs_l_1[0]+beta_l_1[0]+logpNoInsgNoIns;
+	bt_l[0]=0;
+
+
+	// x^l, i^l=0 = > x^{l+1}=x^l-1, i^{l+1}=1
+	//beta_l[ROState]+=(exp(obs_l_1[numS+ROState]+beta_l_1[numS+ROState]+logpLOgLO+logpInsgNoIns)+exp(obs_l_1[numS+hapSize]+beta_l_1[numS+hapSize]+logpFirstgLO+logpInsgNoIns)); // cannot go from insertion on to the haplotype
+	updateMax(beta_l[ROState], bt_l[ROState],obs_l_1[numS+ROState]+beta_l_1[numS+ROState]+logpLOgLO+logProbError[ROState], numS+ROState);
+	updateMax(beta_l[ROState], bt_l[ROState],obs_l_1[numS+hapSize]+beta_l_1[numS+hapSize]+logpFirstgLO+logProbError[hapSize], numS+hapSize);
+
+	for (int x=1;x<=hapSize;x++) {
+		int newx=x-1; if (newx<0) newx=0;
+		//beta_l[x]+=exp(obs_l_1[numS+newx]+beta_l_1[numS+newx]+logpInsgNoIns);
+		updateMax(beta_l[x], bt_l[x],obs_l_1[numS+newx]+beta_l_1[numS+newx]+logProbError[x], numS+newx);
+	}
+
+
+
+	// x^l, i^l=1 = > x^{l+1}, i^{l+1}=1
+	for (int x=0;x<=hapSize+1;x++ ) {
+	//	beta_l[numS+x]=exp(obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgIns);
+		beta_l[numS+x]=obs_l_1[numS+x]+beta_l_1[numS+x]+logpInsgIns;
+		bt_l[numS+x]=numS+x;
+	}
+
+	// x^l, i^l=1 = > x^{l+1}=x^l, i^{l+1}=0
+	for (int x=1;x<=hapSize+1;x++ ) {
+	//	beta_l[numS+x]+=exp(obs_l_1[x]+beta_l_1[x]+logpNoInsgIns);
+		updateMax(beta_l[numS+x], bt_l[numS+x],obs_l_1[x]+beta_l_1[x]+logpNoInsgIns, x);
+	}
+	// convert back to log
+	//for (int x=0;x<2*numS;x++) beta_l[x]=log(beta_l[x]);
+}
diff --git a/ObservationModelFB.hpp b/ObservationModelFB.hpp
new file mode 100644
index 0000000..d995cc5
--- /dev/null
+++ b/ObservationModelFB.hpp
@@ -0,0 +1,169 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef OBSERVATIONMODELFB_HPP_
+#define OBSERVATIONMODELFB_HPP_
+#include <vector>
+#include "Haplotype.hpp"
+#include "Read.hpp"
+#include "MLAlignment.hpp"
+#include "ObservationModel.hpp"
+using namespace std;
+
+const double LOGTINY=-100.0;
+const double EPS=1e-10;
+// simple HMM inference algorithm for observation of single read given from a true underlying haplotype
+class ObservationModelFB
+{
+public:
+	ObservationModelFB() {};
+	virtual ~ObservationModelFB();
+	MLAlignment calcLikelihood();
+	double getLogLikelihood() { calcLikelihood(); return ml.ll; };
+	double* getMarginal(int readBase) { assert(readBase<readSize); return mar[readBase]; };
+	void getObsVector(int b, double *vec) const;
+	/*!
+	 * @abstract Change haplotype used for likelihood computation, which
+	 * can be useful in the EM algorithm
+	 * @params newHap new haplotype
+	 * @discussion newHap must have same length as previous haplotype
+	 */
+	void changeHaplotype(const Haplotype & newHap);
+	void printMarginals();
+	void computeMarginals();
+	void computeXMarginals();
+	void printAlignment(size_t hapScrPos);
+	void printStatistics();
+	vector<double> getOffHapLik() const { return likOffHap; };
+	const MLAlignment &  getMLAlignment() const { return ml; }
+
+protected:
+	virtual void Init(const Haplotype & _hap, uint32_t hapStart);
+	void forceOnHap();
+	void setupReadObservationPotentials();
+	virtual void setupTransitionProbs();
+	virtual void initHMM();
+	//void runHMM();
+	virtual void allocateMemory();
+	virtual void deleteMemory();
+	virtual void computeBMidPrior(vector<double> & _prior, double mapQual);
+
+	void passMessageOneInc(double *alpha_l, const double *alpha_l_1,  const double *obs_l_1);
+	void passMessageOneDec(double *alpha_l, const double *alpha_l_1, const double *obs_l_1);
+	virtual void passMessageTwoInc(double *beta_l, const double *beta_l_1,const double *obs_l_1);
+	virtual void passMessageTwoDec(double *beta_l, const double *beta_l_1,const double *obs_l_1);
+
+	virtual void computeForwardMessages();
+	void computeBackwardMessages();
+	virtual void calcLikelihoodFromLastSlice();
+	void printMarginalsInt(const vector<double*> & pot);
+	bool hasErrors();
+	bool _badValue(double v);
+
+	// maximum number of bases that may be skipped by the sequencer
+	// exponential decay rate of skip probability
+
+
+	double logLikelihood, logLikelihoodNoPrior;
+	MLAlignment ml;
+
+	Haplotype hap;
+	Read read;
+	vector<double> prior;
+	vector<double> logPTrans;
+	vector<double> likOffHap;
+	vector<double> priorOffHap;
+
+
+
+	bool obsInitialized, memAllocated, HMMInitialized, HMMConsistent, likelihoodComputed, forwardDone, backwardDone, marginalsComputed;
+
+	bool makeObsVector;
+
+	// potentials are stored as log-values
+	// observation potentials
+	vector<double*> obs;
+
+	// posterior marginals given _hap
+	vector<double*> mar, xmar;
+	// forward and backward messages
+	vector<double*> alpha, beta;
+	double *obsVector;
+
+	vector<double> logProbError, logProbNoError;
+
+	// structure of read-base variable
+
+	// {LeftOfHaplotype, Hap_1, Hap_2, Hap_3, ..., Hap_length, RightOfHaplotype}
+
+	// HMM internal variables
+
+	int hapSize, readSize, ROState, bMid, hapStart, numT, numS;
+
+	double logpLOgLO, logpFirstgLO;
+	double logpInsgIns, logpInsgNoIns, logpNoInsgNoIns, logpNoInsgIns;
+
+public:
+
+	ObservationModelParameters params;
+	ObservationModelFB(const Haplotype & _hap, const Read & r, uint32_t hapStart, const ObservationModelParameters & params);
+
+
+};
+
+class ObservationModelFBMax : public ObservationModelFB
+{
+public:
+	void printMarginals() { throw string("Not possible for this model"); }
+	void computeMarginals() { throw string("Not possible for this model"); }
+	void computeXMarginals() { throw string("Not possible for this model"); }
+	void reportVariants();
+	void printAlignment(size_t hapScrPos);
+	MLAlignment calcLikelihood();
+	ObservationModelFBMax(const Haplotype & _hap, const Read & r, uint32_t hapStart, const ObservationModelParameters & params);
+	ObservationModelFBMax() {};
+
+	~ObservationModelFBMax() { deleteMemory(); };
+	vector<int> getMapState() { runHMM(); return mapState; };
+protected:
+	void passMessageOneInc(double *alpha_l, const double *alpha_l_1,  const double *obs_l_1);
+	void passMessageOneDec(double *alpha_l, const double *alpha_l_1, const double *obs_l_1);
+	virtual void passMessageTwoInc(double *beta_l, const double *beta_l_1,const double *obs_l_1, int * bt_l);
+	virtual void passMessageTwoDec(double *beta_l, const double *beta_l_1,const double *obs_l_1, int * bt_l);
+	void allocateMemory();
+	void deleteMemory();
+	void runHMM();
+	void computeForwardMessages();
+	void computeBackwardMessages();
+	void calcLikelihoodFromLastSlice();
+	void computeMAPState();
+	inline void updateMax(double & destValue, int & destIdx, const double newValue, const int newIdx);
+	vector<int *> btf, btb;
+	vector<int> mapState; // MAP state for HMM
+	//static double EPS=1e-10;
+};
+
+class ObservationModelFBMaxErr : public ObservationModelFBMax
+{
+public:
+	ObservationModelFBMaxErr(const Haplotype & _hap, const Read & r, uint32_t hapStart, const ObservationModelParameters & params);
+
+protected:
+	void setupTransitionProbs();
+	void passMessageTwoInc(double *beta_l, const double *beta_l_1,const double *obs_l_1, int * bt_l);
+	void passMessageTwoDec(double *beta_l, const double *beta_l_1,const double *obs_l_1, int * bt_l);
+};
+
+
+#endif /*ObservationModelFB_HPP_*/
diff --git a/ObservationModelSeqAn.hpp b/ObservationModelSeqAn.hpp
new file mode 100644
index 0000000..99a1a2a
--- /dev/null
+++ b/ObservationModelSeqAn.hpp
@@ -0,0 +1,377 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * ObservationModelSeqAn.hpp
+ *
+ *  Created on: Sep 4, 2009
+ *      Author: caa
+ */
+
+#ifndef OBSERVATIONMODELSEQAN_HPP_
+#define OBSERVATIONMODELSEQAN_HPP_
+
+#include <seqan/align.h>
+#include <seqan/align/align_base.h>
+#include <seqan/graph_align.h>
+#include "Haplotype.hpp"
+#include "Read.hpp"
+#include "ObservationModel.hpp"
+#include "MLAlignment.hpp"
+using namespace seqan;
+using namespace std;
+
+const int DEBUGCONVERTALIGNMENT = 0;
+
+class Realign {
+public:
+	void getFlankingCoordinatesBetter(const Haplotype & hap, const Read & read, AlignedVariant & av)
+	{
+		int rightFlankHap, leftFlankHap,rightFlankRead, leftFlankRead;
+		//cout << "variant: " << av.getString() << endl;
+		//cout << "startHap: " << av.getStartHap() << endl;
+		//cout << "startRead: " << av.getStartRead() << endl;
+
+
+		if (av.getType()==Variant::DEL) {
+			const string & seq = av.getSeq();
+			int l = seq.size();
+
+			string origSeq = hap.seq;
+
+			int sh = av.getStartHap();
+			origSeq.erase(sh, l);
+			leftFlankHap = sh-1;
+			rightFlankHap = sh+l;
+			int newpos = sh;
+			for (int x=sh-1;x>0;x--) {
+				string newseq = hap.seq;
+				newseq.erase(x, l);
+				if (newseq == origSeq) {
+					leftFlankHap = x-1;
+					newpos = x;
+				}
+			}
+			if (leftFlankHap<=0) leftFlankHap = 0;
+			for (int x=sh+1;x<int(hap.seq.size()-l);x++) {
+				string newseq = hap.seq;
+				newseq.erase(x, l);
+				if (newseq == origSeq) {
+					rightFlankHap = x+l;
+					newpos = x;
+				}
+			}
+
+			leftFlankRead = av.getStartRead()- (sh-leftFlankHap)+1; if (leftFlankRead<0) leftFlankRead = 0;
+			rightFlankRead = av.getStartRead()+1 + (rightFlankHap-sh-l); if (rightFlankRead>=int(read.seq.size())) leftFlankRead = read.seq.size()-1;
+
+
+
+			//cout << "leftFlankHap: " << leftFlankHap << " rightFlankHap: " << rightFlankHap << endl;
+			//cout << "leftFlankRead: " << leftFlankRead << " rightFlankRead: " << rightFlankRead << endl;
+		} else if (av.getType()==Variant::INS) {
+			const string & seq = av.getSeq();
+			int l = seq.size();
+			string newiseq;
+			string origSeq = hap.seq;
+
+			int sh = av.getStartHap();
+			origSeq.insert(sh, seq);
+			leftFlankHap = sh-1;
+			rightFlankHap = sh;
+			int newpos = sh;
+			for (int x=sh-1;x>0;x--) {
+				string newseq = hap.seq;
+				string iseq = origSeq.substr(x, l);
+				newseq.insert(x, iseq);
+				int eq = 0;
+				if (newseq == origSeq) {
+					leftFlankHap = x-1;
+					eq = 1;
+					newpos = x;
+					newiseq = iseq;
+				}
+				//cout << "x: " << x << " iseq: " << iseq << " eq: " << eq << endl;
+
+			}
+			if (leftFlankHap<=0) leftFlankHap = 0;
+			for (int x=sh+1;x<int(hap.seq.size()-l);x++) {
+				string newseq = hap.seq;
+				string iseq = origSeq.substr(x, l);
+				newseq.insert(x, iseq);
+				int eq=0;
+				if (newseq == origSeq) {
+					rightFlankHap = x;
+					eq=1;
+					newpos = x;
+				}
+				//cout << "x: " << x << " iseq: " << iseq << " eq: " << eq << endl;
+
+			}
+
+			leftFlankRead = av.getStartRead()- (sh-leftFlankHap)+1; if (leftFlankRead<0) leftFlankRead = 0;
+			rightFlankRead = av.getStartRead()+l + (rightFlankHap-sh); if (rightFlankRead>=int(read.seq.size())) leftFlankRead = read.seq.size()-1;
+
+
+
+			//cout << "leftFlankHap: " << leftFlankHap << " rightFlankHap: " << rightFlankHap << endl;
+			//cout << "leftFlankRead: " << leftFlankRead << " rightFlankRead: " << rightFlankRead << endl;
+			//cout << "newiseq: " << newiseq << endl;
+		} else {
+			leftFlankRead = av.getStartRead()-1; if (leftFlankRead<0) leftFlankRead = 0;
+			rightFlankRead = av.getStartRead()+1; if (rightFlankRead>=int(read.seq.size())) leftFlankRead = read.seq.size()-1;
+			leftFlankHap = av.getStartHap()-1; if (leftFlankHap<0) leftFlankHap = 0;
+			rightFlankHap = av.getStartHap()+1; if (rightFlankHap>=int(hap.seq.size())) leftFlankHap = hap.seq.size()-1;
+		}
+		av.setFlanking(leftFlankHap, rightFlankHap, leftFlankRead, rightFlankRead);
+	}
+};
+
+
+template <typename TSource, typename TSpec>
+inline void
+convertAlignment(
+          Align<TSource, TSpec> const & source, MLAlignment & ml, int hlen, int rlen, const Haplotype & hap, const Read & read)
+{
+        typedef Align<TSource, TSpec> const TAlign;
+        typedef typename Row<TAlign>::Type TRow;
+        typedef typename Position<typename Rows<TAlign>::Type>::Type TRowsPosition;
+        typedef typename Position<TAlign>::Type TPosition;
+
+        TPosition begin_ = beginPosition(cols(source));
+        TPosition end_ = endPosition(cols(source));
+
+	Realign realign;
+
+	if (DEBUGCONVERTALIGNMENT) cout << "begin_ " << begin_ << "  end_ " << end_ << endl;
+
+
+        ml.relPos = 0;
+        bool fbfound=false;
+
+        TRow& row_ = row(source, 0);
+        typedef typename Iterator<typename Row<TAlign>::Type const, Standard>::Type TIter;
+        TIter begin1_ = iter(row_, begin_);
+        TIter end1_ = iter(row_, end_);
+
+
+	ml.align=string(hlen,'R');
+	ml.hpos=vector<int>(rlen,MLAlignment::LO);
+
+
+	int b=0;
+	int hs=0; // relative start of haplo
+
+	int rb=0;
+
+	while (isGap(row(source,0), begin_+b)) {
+		ml.relPos--;
+		if (!isGap(row(source,1),begin_+b)) {
+			ml.hpos[rb]=MLAlignment::LO;
+			rb++;
+		}
+		++b;
+	}
+	hs=b;
+
+	if (DEBUGCONVERTALIGNMENT) cout << "relpos: " << ml.relPos << " hs: " << hs << endl;
+
+	int hb=0; // number of haplotype bases
+
+	while (begin_+b<end_ && rb<rlen) {
+	        if (DEBUGCONVERTALIGNMENT)  cout << "b: " << b << " hb: " <<  hb << endl;
+                if (isGap(row(source,0), begin_+b)) {
+                        if (hb<hlen) {
+				// insertion
+				string seq("+");
+				TIter it=iter(row(source,1), begin_+b);
+				while (isGap(row(source,0),begin_+b) && begin_+b<end_) {
+					seq+=convert<char>(*it);
+					ml.hpos[rb]=MLAlignment::INS;
+					++b;
+					++it;
+					++rb;
+				}
+				if (DEBUGCONVERTALIGNMENT) cout << "insertion: " << hb << " seq: " << seq << " readpos: " << rb-1 << " - " << rb-seq.size()+1 << endl;
+				AlignedVariant av(seq, hb,hb, rb-seq.size()+1, rb-1);
+				realign.getFlankingCoordinatesBetter(hap, read, av);
+				ml.indels[hb]=av;
+			} else {
+				ml.hpos[rb]=MLAlignment::RO;
+				++rb;
+				++b;
+			}
+
+                } else {
+                        if (!isGap( row(source,1), begin_+b)) {
+                                if (!fbfound) {
+                                        fbfound=true;
+					ml.firstBase=hb;
+                                }
+                                if (row(source,1)[begin_+b]!=row(source,0)[begin_+b]) {
+                                        // SNP
+					string snp("X=>X");
+					snp[0]=convert<char>(row(source,0)[begin_+b]);
+					snp[3]=convert<char>(row(source,1)[begin_+b]);
+
+					if (DEBUGCONVERTALIGNMENT) cout << "SNP: " << hb << " " << convert<char>(row(source,0)[begin_+b]) << "=>" << convert<char>(row(source,1)[begin_+b]) << endl;
+					ml.snps[hb]=AlignedVariant(snp, hb, hb, rb,rb);
+					realign.getFlankingCoordinatesBetter(hap, read, ml.snps[hb]);
+					ml.align[hb]=snp[3];
+                                }
+				ml.hpos[rb]=hb;
+				++rb;
+				++b;
+				++hb;
+                        } else {
+                                // deletion
+				string seq("-");
+				TIter it=iter(row(source,0), begin_+b);
+				int len=0;
+				while (isGap(row(source,1),begin_+b) && begin_+b<end_) {
+					seq+=convert<char>(*it);
+					ml.align[hb]='D';
+					++b;
+					++it;
+					++hb;
+					++len;
+				}
+				if (fbfound) {
+					ml.indels[hb-len]=AlignedVariant(seq, hb-len,hb-1, rb-1,rb);
+					realign.getFlankingCoordinatesBetter(hap, read, ml.indels[hb-len]);
+					if (DEBUGCONVERTALIGNMENT) cout << "deletion: " << hb-len << " - " << hb-1 <<  " seq: " << seq << " readpos: " << rb-1 << " - " << rb << endl;
+
+				}
+
+
+                        }
+
+        	}
+	}
+	ml.lastBase=hb;
+
+	if (DEBUGCONVERTALIGNMENT) {
+		cout << "mfb: " << ml.firstBase << " " << ml.lastBase << endl;
+		for (int r=0;r<rlen;r++) cout << "[" << r << "," << ml.hpos[r] << "]"; cout << endl;
+	}
+
+}
+
+/*
+int main()
+{
+
+
+
+	// check
+	// seqan::DnaString     _refSeq("ATGGCGTGACTGATCCTATCCCCGTT");
+	// seqan::DnaString _hapSeq("TTATATGGCGTG");
+
+	//seqan::DnaString _refSeq("ATGGCGTGACTGATCCTATCGTCGTT");
+	//seqan::DnaString   _hapSeq("CCCGGTGACTCC");
+
+	seqan::DnaString _refSeq("ATGGCGTGACTGATCCTATCGTCGTT");
+	seqan::DnaString           _hapSeq("CTATCGTCTGTAGGTGTCCT");
+
+
+	seqan::Score<int> score(-1, -460, -100,-960);
+
+	seqan::Align<seqan::DnaString, seqan::ArrayGaps> align;
+	seqan::resize(seqan::rows(align), 2);
+	seqan::assignSource(seqan::row(align, 0), _refSeq);
+	seqan::assignSource(seqan::row(align, 1), _hapSeq);
+	cout << "Score = " << seqan::globalAlignment(align, score) << endl;
+	cout << align << endl;
+
+	MLAlignment ml;
+
+	convertAlignment(align,ml, length(_refSeq),length(_hapSeq));
+
+}
+*/
+
+class ObservationModelSeqAn
+{
+public:
+	ObservationModelSeqAn(const Haplotype & _hap, const Read & r, uint32_t _hapStart, const ObservationModelParameters & _params, const seqan::Score<int> & _score)
+	{
+		score =_score;
+		hap_ptr = &_hap;
+		read_ptr = &r;
+		hapStart=_hapStart;
+		params=_params;
+		aligned=false;
+
+		// cout << "hap.seq: " << _hap.seq << endl;
+		// cout << "read: " << r.seq.seq << endl;
+
+	}
+
+	void align()
+	{
+		if (aligned) return;
+		seqan::DnaString _hapSeq(hap_ptr->seq);
+		seqan::DnaString _readSeq(read_ptr->seq.seq);
+		alignResult=MyAlign();
+
+		seqan::resize(seqan::rows(alignResult), 2);
+		seqan::assignSource(seqan::row(alignResult, 0), _hapSeq);
+		seqan::assignSource(seqan::row(alignResult, 1), _readSeq);
+
+		stringstream os;
+		os << seqan::globalAlignment(alignResult, score) << endl;
+		ml.ll=atof(os.str().c_str());
+
+
+		if (DEBUGCONVERTALIGNMENT) {
+			cout << alignResult << endl;
+		}
+
+
+		convertAlignment(alignResult, ml, length(_hapSeq), length(_readSeq), *hap_ptr, *read_ptr);
+
+		reportVariants();
+		aligned=true;
+	}
+	const MLAlignment &  getMLAlignment() { align(); return ml; }
+
+protected:
+	void reportVariants()
+	{
+		const Haplotype & hap = *hap_ptr;
+		for (map<int,AlignedVariant>::const_iterator it=hap.indels.begin();it!=hap.indels.end();it++) {
+			const AlignedVariant & av=it->second;
+			if (av.isCovered(params.padCover, ml.firstBase, ml.lastBase)) ml.hapIndelCovered[it->first]=true; else ml.hapIndelCovered[it->first]=false;
+		}
+		for (map<int,AlignedVariant>::const_iterator it=hap.snps.begin();it!=hap.snps.end();it++) {
+			const AlignedVariant & av=it->second;
+			if (av.isCovered(params.padCover, ml.firstBase, ml.lastBase)) ml.hapSNPCovered[it->first]=true; else ml.hapSNPCovered[it->first]=false;
+		}
+	}
+
+	typedef seqan::Align<seqan::DnaString, seqan::ArrayGaps> MyAlign;
+	MyAlign alignResult;
+	ObservationModelParameters params;
+	const Haplotype *hap_ptr;
+	const Read *read_ptr;
+	uint32_t hapStart;
+
+
+	MLAlignment ml;
+	seqan::Score<int> score;
+	bool aligned;
+
+};
+
+#endif /* OBSERVATIONMODELSEQAN_HPP_ */
diff --git a/OutputData.hpp b/OutputData.hpp
new file mode 100644
index 0000000..ae693eb
--- /dev/null
+++ b/OutputData.hpp
@@ -0,0 +1,121 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * OutputData.hpp
+ *
+ *  Created on: Sep 7, 2009
+ *      Author: caa
+ */
+
+#ifndef OUTPUTDATA_HPP_
+#define OUTPUTDATA_HPP_
+#include <vector>
+#include "StringHash.hpp"
+#include <string>
+#include <iostream>
+#include "foreach.hpp"
+using namespace std;
+using namespace boost;
+
+class OutputData
+{
+	friend class Line;
+public:
+	OutputData(ostream &_out) { out = &_out; numLines=0;};
+
+	OutputData(ostream & _out, const vector<string> & _columnLabels)
+	{
+		out=&_out;
+		BOOST_FOREACH(string label, _columnLabels) (*this)(label);
+		numLines=0;
+	}
+	OutputData & operator() (const string & label)
+	{
+		HashIt it=labelToColumn.find(label);
+		if (it!=labelToColumn.end()) throw string("Duplicate label ").append(label);
+		labelToColumn[label]=int(labels.size());
+		labels.push_back(label);
+		return *this;
+	}
+	string headerString() const
+	{
+		stringstream out;
+		if (labels.size()>0) {
+			out << labels[0];
+			for (size_t x=1;x<labels.size();x++) out << " " << labels[x];
+		}
+		return out.str();
+	}
+	template<class T> void outputLine(T x)
+	{
+		*out << x << endl;
+	}
+	class Line
+	{
+	public:
+		Line(const OutputData  & od) : lineData(od.labelToColumn.size(),"NA")
+		{
+			labelToColumnPtr = & od.labelToColumn;
+
+		}
+		string get(const string & columnLabel) const
+		{
+			string_hash<int>::const_iterator it=labelToColumnPtr->find(columnLabel);
+			if (it != labelToColumnPtr->end()) {
+				return lineData[it->second];
+			} else throw string("Column label ").append(columnLabel).append(" not found!");
+		}
+		template<class T> Line & set(const string & columnLabel, T x)
+		{
+			string_hash<int>::const_iterator it=labelToColumnPtr->find(columnLabel);
+			if (it != labelToColumnPtr->end()) {
+				stringstream os;
+				os << x;
+				lineData[it->second]=os.str();
+			} else throw string("Column label ").append(columnLabel).append(" not found!");
+			return *this;
+		}
+		string toString() const
+		{
+			stringstream out;
+			if (lineData.size()>0) {
+				out << lineData[0];
+				for (size_t x=1;x<lineData.size();x++) out << " " << lineData[x];
+			}
+			return out.str();
+		}
+		vector<string> lineData;
+	protected:
+		const string_hash<int> *labelToColumnPtr;
+	};
+	void output(const OutputData::Line & line)
+	{
+		numLines++;
+		*out << line.toString() << endl;
+	}
+
+	ostream *out;
+protected:
+	typedef string_hash<int>::iterator HashIt;
+	string_hash<int> labelToColumn;
+
+	vector<string> labels;
+	int numLines;
+};
+
+
+
+
+#endif /* OUTPUTDATA_HPP_ */
diff --git a/README b/README
new file mode 100644
index 0000000..f10cc5f
--- /dev/null
+++ b/README
@@ -0,0 +1,15 @@
+The source code for Dindel itself is released under GPL3.
+
+In the directory you will find parts of the Boost library (www.boost.org), and
+parts of the Seqan library (www.seqan.de). You will still need to install a copy
+of the Boost library in order for the program_options to compile. The parts of
+the Boost library included in Dindel are there to make it compatible with old
+versions of Boost which didn't have 'foreach.hpp' for instance. The Seqan
+library is included as Dindel uses its Needleman-Wunsch algorithm to align
+candidate haplotypes to the reference sequence.
+
+You will need to download the source code for SAMtools in order to compile
+Dindel. The path to the SAMtools source files can be specified in the Makefile.
+
+If all libraries are in place, simply type "make" at the command line to compile
+Dindel.
diff --git a/Read.hpp b/Read.hpp
new file mode 100644
index 0000000..851518a
--- /dev/null
+++ b/Read.hpp
@@ -0,0 +1,451 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef READ_HPP_
+#define READ_HPP_
+#include <cmath>
+#include "Haplotype.hpp"
+#include "bam.h"
+#include "Library.hpp"
+#include <vector>
+#include <algorithm>
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <ext/hash_map>
+#include "MyBam.hpp"
+
+using namespace std;
+
+class Read
+{
+
+public:
+	class FetchReadData
+	{
+	public:
+		FetchReadData(vector<Read> * _reads,int _poolID, const LibraryCollection * _libraries, vector<MyBam *> * _myBams, int _numReads = 0, int _maxNumReads = 100000)
+		{
+			reads=_reads;
+			poolID=_poolID;
+			libraries=_libraries;
+			myBams = _myBams;
+			numUnknownLib=0;
+			numReads = _numReads;
+			maxNumReads = _maxNumReads;
+		}
+		vector<Read> * reads;
+		vector<MyBam *> * myBams;
+		int poolID;
+		const LibraryCollection * libraries;
+		int numUnknownLib;
+		int numReads;
+		int maxNumReads;
+		string_hash<int> unknownLib;
+	};
+
+	Read()
+	{
+		init(0);
+	}
+	Read(uint32_t _pos)
+	{
+		init(pos);
+	}
+	void init(uint32_t _pos)
+	{
+		pos=_pos;
+		posStat.first=double(_pos);
+		posStat.second=1.0;
+		initBam=false;
+		onReverseStrand=false;
+		poolID=-1;
+		mateLen = -1;
+		matePos = -1;
+		library = NULL;
+	}
+	Read(const Read & r)
+	{
+		initBam=false;
+		copy(r,0);
+	}
+	Read(const Read & r, int poolID)
+	{
+		initBam=false;
+		copy(r,0);
+	}
+	void copy(const Read &r, int x)
+	{
+	 	 seq=r.seq;
+		 pos=r.pos;
+		 qual=r.qual;
+		 posStat=r.posStat;
+		 mapQual=r.mapQual;
+		 onReverseStrand=r.onReverseStrand;
+		 poolID=r.poolID;
+		 matePos = r.matePos;
+		 mateLen = r.mateLen;
+		 library = r.library;
+		 bamHeader = r.bamHeader;
+		if (initBam) {
+			delete[] bam->data;
+			delete bam;
+			initBam=false;
+		}
+
+		 if (r.initBam) {
+ 			bam=new bam1_t;
+			*bam=*r.bam;
+			bam->data=new uint8_t[r.bam->m_data];
+			bam->m_data=r.bam->m_data;
+			for (int m=0;m<r.bam->m_data;m++) bam->data[m]=r.bam->data[m];
+			initBam=true;
+		}
+	}
+	Read & operator=(const Read & r)
+	{
+		if (&r!=this) {
+			copy(r,1);
+		}
+		return *this;
+	}
+	Read(const bam1_t *b, const LibraryCollection & libraries, int _poolID, bam_header_t * _bamHeader, const string & overrideLibName = string("") )
+	{
+		const bam1_core_t *c=&b->core;
+		uint32_t len=c->l_qseq;
+		double mapPhred=(double) c->qual;
+		mapQual=(1.0-pow(10.0, -mapPhred/10.0));
+		if (mapQual<0.0 || mapQual>1.0 || isnan(mapQual) || isinf(mapQual)) throw string("Phred error.");
+		if (mapQual<1e-16) mapQual=1e-16;
+		if (mapQual>1-1e-16) mapQual=1-1e-16;
+
+		// cout << "mapPhred: " << mapPhred << " qmap: " << qmap << endl;
+
+		pos=c->pos;
+
+		seq.reserve(len);
+		qual.reserve(len);
+		for(size_t x=0;x<len;x++) {
+			seq+=( bam_nt16_rev_table[ bam1_seqi(bam1_seq(b), x) ] );
+
+			// convert phred to probability
+			double basePhred=(double)  ( ( (uint8_t*) bam1_qual(b))[x] );
+			double q=(1.0-pow(10.0, -basePhred/10.0));
+			if (q<0.0 || q>1.0 ||isnan(q) || isinf(q)) throw string("Phred error.");
+			if (q<1e-16) q=1e-16;
+			if (q>1.0-1e-16) q=1.0-1e-16;
+			qual.push_back( q ); // base quality is on log10 scale
+		}
+
+		posStat=computePositionStatistics(b);
+
+		bam = new bam1_t;
+		*bam=*b;
+		bam->data=new uint8_t[b->m_data];
+		bam->m_data=b->m_data;
+		for (int m=0;m<b->m_data;m++) bam->data[m]=b->data[m];
+		initBam=true;
+
+		if (bam->core.flag & BAM_FREVERSE)  onReverseStrand=true; else onReverseStrand=false;
+		poolID=_poolID;
+		matePos = bam->core.mpos;
+		mateLen = -1;
+
+		this->bamHeader = _bamHeader;
+		LibraryCollection::const_iterator it;
+		if (overrideLibName.empty()) {
+			it = libraries.find( this->getLibraryName() );
+		} else {
+			it = libraries.find( overrideLibName );
+		}
+
+		if (it == libraries.end()) {
+			deleteBam();
+			initBam = false;
+			throw string("Cannot find library: ").append(this->getLibraryName());
+		} else {
+			library = (const Library *) & (it->second);
+		}
+	}
+	uint32_t getEndPos() const
+	{
+		return bam->core.n_cigar? bam_calend(&bam->core, bam1_cigar(bam)) : bam->core.pos + 1;
+	}
+	string getLibraryName() const
+	{
+		if (this->isPaired()) {
+			const char *p = bam_get_library((bam_header_t *) this->bamHeader, this->bam);
+			if (p) {
+				return string(p);
+			} else {
+				return string("dindel_default");
+			}
+		} else {
+			return string("single_end");
+		}
+	}
+
+	int32_t getBAMMatePos() const { return bam->core.mpos; }
+	bool isUnmapped() const { return (bam->core.flag & BAM_FUNMAP) != 0 ; }
+	bool mateIsUnmapped() const { return (bam->core.flag & BAM_FMUNMAP) != 0; }
+	bool mateIsReverse() const { return (bam->core.flag & BAM_FMREVERSE) != 0; }
+	bool isReverse() const { return (bam->core.flag & BAM_FREVERSE) != 0; }
+	bool isPaired() const { return (bam->core.flag & BAM_FPAIRED) != 0; }
+	void complement()
+	{
+		for (size_t s=0;s<this->seq.seq.size();s++) {
+			char & nuc = this->seq.seq[s];
+			if (nuc == 'A') nuc = 'T';
+			else if (nuc == 'T') nuc = 'A';
+			else if (nuc == 'C') nuc = 'G';
+			else if (nuc == 'G') nuc = 'C';
+		}
+	}
+	void reverse()
+	{
+		string newseq = this->seq.seq;
+		size_t len = newseq.size();
+		for (size_t x=0;x<newseq.size();x++) newseq[len-x-1]=this->seq.seq[x];
+		this->seq.seq = newseq;
+	}
+
+	string getAuxData() const
+	{
+		stringstream os;
+		uint8_t *s = bam1_aux(bam);
+
+		while (s < bam->data + bam->data_len) {
+			uint8_t type, key[2];
+			key[0] = s[0]; key[1] = s[1];
+			s += 2; type = *s; ++s;
+			//printf("\t%c%c:", key[0], key[1]);
+			os << "\t" << key[0] << key[1];
+			/*
+			if (type == 'A') { printf("A:%c", *s); ++s; }
+			else if (type == 'C') { printf("i:%u", *s); ++s; }
+			else if (type == 'c') { printf("i:%d", *s); ++s; }
+			else if (type == 'S') { printf("i:%u", *(uint16_t*)s); s += 2; }
+			else if (type == 's') { printf("i:%d", *(int16_t*)s); s += 2; }
+			else if (type == 'I') { printf("i:%u", *(uint32_t*)s); s += 4; }
+			else if (type == 'i') { printf("i:%d", *(int32_t*)s); s += 4; }
+			else if (type == 'f') { printf("f:%g", *(float*)s); s += 4; }
+			else if (type == 'Z' || type == 'H') { printf("%c:", type); while (*s) putchar(*s++); ++s; }
+			*/
+			if (type == 'A') { os << "A:"<<(char)*s; ++s; }
+			else if (type == 'C') { os << "i:" << (unsigned int) *s; ++s; }
+			else if (type == 'c') { os << "i:" << (int) *s; ++s; }
+			else if (type == 'S') { os << "i:" << *(uint16_t*)s; s += 2; }
+			else if (type == 's') { os << "i:" << *(int16_t*)s; s += 2; }
+			else if (type == 'I') { os << "i:" << *(uint32_t*)s; s += 4; }
+			else if (type == 'i') { os << "i:" <<  *(int32_t*)s; s += 4; }
+			else if (type == 'f') { os << "f:" << *(float*)s; s += 4; }
+			else if (type == 'Z' || type == 'H') { os << type <<":"; while (*s) os << (char) (*s++); ++s; }
+		}
+		return os.str();
+	}
+
+	const Library & getLibrary() const { return *this->library; };
+
+	// compute mean and standard deviation of first base position
+	static pair<double, double> computePositionStatistics(const bam1_t *b)
+	{
+		const bam1_core_t *c=&b->core;
+		uint32_t *cigar=bam1_cigar(b);
+		uint32_t k;
+		int32_t pos=0, mean=0,totLen=0;
+
+		uint32_t refPos = c->pos;
+		double var=0.0;
+		if (c->n_cigar==0) {
+			return pair<double,double>(-1.0,-1.0);
+		}
+
+		for (k = 0; k < c->n_cigar; ++k) {
+			int op = cigar[k] & BAM_CIGAR_MASK;
+			int32_t len=cigar[k] >> BAM_CIGAR_SHIFT;
+
+			if (op==BAM_CMATCH) {
+				mean+=len*(pos-totLen);
+				totLen+=len;
+			}
+			// update position for the next cigar
+			if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CSOFT_CLIP || op ==BAM_CHARD_CLIP) {
+				pos+=len;
+			}
+		}
+		double dmean=double(mean)/double(totLen);
+
+		pos=0;
+		totLen=0;
+		for (k = 0; k < c->n_cigar; ++k) {
+			int op = cigar[k] & BAM_CIGAR_MASK;
+			int32_t len=cigar[k] >> BAM_CIGAR_SHIFT;
+
+			if (op==BAM_CMATCH) {
+				var+=double(len)*(double(pos-totLen)-dmean)*(double(pos-totLen)-dmean);
+				totLen+=len;
+			}
+			// update position for the next cigar
+			if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CSOFT_CLIP || op ==BAM_CHARD_CLIP) {
+				pos+=len;
+			}
+		}
+		var=var/double(totLen);
+		return pair<double,double>(dmean+double(refPos), var);
+	}
+	/*
+	static void filterReads(vector<Read> & reads, size_t max)
+	{
+		// filter using map quality
+		class SortFunc {
+		public:
+			static bool sortFunc(const Read & r1, const Read & r2)
+			{
+				// sort in decreasing order
+				if (r1.mapQual>r2.mapQual) return true; else return false;
+			}
+		};
+		sort(reads.begin(), reads.end(), SortFunc::sortFunc);
+		//reads.resize(max);
+		vector<Read> filteredReads;
+		for (size_t i=0;i<reads.size() && i<max ;i++) {
+			filteredReads[i]=reads[i];
+		}
+		reads.swap(filteredReads);
+	}
+	static void filterReads(vector<Read> & reads, size_t maxNum, double minMapQual, size_t maxReadLength)
+	{
+		vector<Read> filteredReads;
+		if (minMapQual<0.0) minMapQual=0.0;
+		// filter using map quality
+		class SortFunc {
+		public:
+			static bool sortFunc(const Read & r1, const Read & r2)
+			{
+				// sort in decreasing order
+				if (r1.mapQual>r2.mapQual) return true; else return false;
+			}
+		};
+		for (size_t r=0;r<reads.size();r++) {
+			const bam1_core_t *c=&(reads[r].bam->core);
+			if (reads[r].size()>maxReadLength || c->n_cigar==0) reads[r].mapQual=-1.0;
+
+		}
+
+		sort(reads.begin(), reads.end(), SortFunc::sortFunc);
+		size_t max; for (max=0;max<maxNum && max<reads.size();max++) if (!(reads[max].mapQual<minMapQual)) filteredReads.push_back(Read(reads[max])); else break;
+		reads.swap(filteredReads);
+	}
+	*/
+	static void filterReads(vector<Read> & reads, int exclude, const string & match)
+	{
+		vector<Read> filteredReads;
+		for (size_t r=0;r<reads.size();r++) {
+			string str=reads[r].getAuxData();
+			size_t found=str.find(match);
+			if (exclude) {
+				if (found==string::npos) {
+					filteredReads.push_back(reads[r]);
+				}
+			} else {
+				// include if match
+				if (found!=string::npos) filteredReads.push_back(reads[r]);
+			}
+		}
+		reads.swap(filteredReads);
+	}
+	/*
+	static int fetchFuncVector(const bam1_t *b, void *data)
+	{
+		FetchReadData *ptr=(FetchReadData *) data;
+
+		if (!( (b->core.flag & BAM_FDUP) || (b->core.flag & BAM_FQCFAIL) )) {
+			ptr->reads->push_back(Read(b, *(ptr->libraries)));
+		}
+		return 0;
+	}
+	*/
+
+	static int fetchFuncVectorPooled(const bam1_t *b, void *data)
+	{
+		FetchReadData *ptr=(FetchReadData *) data;
+
+		if (!( (b->core.flag & BAM_FDUP) || (b->core.flag & BAM_FQCFAIL) )) {
+			try {
+				ptr->reads->push_back(Read(b, *(ptr->libraries), ptr->poolID, (*(ptr->myBams))[ptr->poolID]->bh));
+				ptr->numReads++;
+			} catch (string s) {
+				if (s.find("Cannot find library")!=string::npos) {
+					string lib = s.substr(22, s.size()-22);
+					string_hash<int>::iterator _it = ptr->unknownLib.find(lib);
+					if (_it == ptr->unknownLib.end()) {
+						 ptr->unknownLib[lib] = 0;
+					} else _it->second++;
+				}
+				ptr->numUnknownLib++;
+				ptr->reads->push_back(Read(b, *(ptr->libraries), ptr->poolID, (*(ptr->myBams))[ptr->poolID]->bh, string("single_end")));
+				ptr->numReads++;
+			}
+		}
+		if (ptr->numReads > ptr->maxNumReads) {
+			throw string("Too many reads in region");
+		}
+		if (ptr->numReads % 10000 == 9999) cout << "numreads: " << ptr->numReads << endl;
+		return 0;
+	}
+
+	friend ostream &operator<<(ostream &stream, const Read & read)
+	{
+		cout << "pos: " << read.pos << " 1-mapping quality: " << 1.0-read.mapQual << " ";
+		for (size_t b=0;b<read.seq.size();b++) stream << read.seq[b];
+		for (size_t b=0;b<read.qual.size();b++) stream << " " << read.qual[b];
+		return stream;
+	};
+	bam1_t * getBam() const { return bam; };
+
+	size_t size() const { return seq.size(); };
+	void setAllQual(double v) { qual.clear(); qual.reserve(seq.size()); for (size_t x=0;x<seq.size();x++) qual.push_back(v); };
+
+
+	Haplotype seq;
+	vector<double> qual;
+	pair<double,double> posStat;
+	// offset of read with respect to some reference position
+
+	int32_t pos, matePos, mateLen;
+
+	double mapQual;
+	bool initBam;
+	bool onReverseStrand;
+	int poolID;
+
+	bam_header_t * bamHeader;
+	const Library * library; // pointer to library this read was generated from
+
+	bam1_t *bam;
+
+	void deleteBam()
+	{
+		delete[] bam->data; delete bam;
+	}
+
+	~Read()
+	{
+		if (initBam) {
+			deleteBam();
+		}
+	}
+};
+
+#endif /*READ_HPP_*/
diff --git a/ReadIndelErrorModel.hpp b/ReadIndelErrorModel.hpp
new file mode 100644
index 0000000..fced785
--- /dev/null
+++ b/ReadIndelErrorModel.hpp
@@ -0,0 +1,57 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * ReadIndelErrorModel.hpp
+ *
+ *  Created on: Oct 12, 2009
+ *      Author: caa
+ */
+
+#ifndef READINDELERRORMODEL_HPP_
+#define READINDELERRORMODEL_HPP_
+
+class ReadIndelErrorModel
+{
+public:
+	ReadIndelErrorModel()
+	{
+		double hp[] = { 2.9e-5, 2.9e-5,2.9e-5, 2.9e-5, 4.3e-5, 1.1e-4, 2.4e-4, 5.7e-4, 1.0e-3, 1.4e-3 };
+		baselineProbs = vector<double>(10,0.0);
+		for (int x=0;x<10;x++) {
+			baselineProbs[x]=hp[x];
+		}
+	}
+	double getViterbiHPError(int hpLen)
+	{
+		int len=hpLen;
+		if (len<1) len=1;
+		double pbe;
+		if (len<=10) {
+			pbe=baselineProbs[len-1];
+		} else {
+			pbe=baselineProbs[9]+4.3e-4*double(len-10);
+		}
+		pbe *= double(hpLen);
+		if (pbe>0.99) pbe = 0.99;
+		return pbe;
+
+	}
+private:
+	vector<double> baselineProbs;
+
+};
+
+
+#endif /* READINDELERRORMODEL_HPP_ */
diff --git a/StringHash.hpp b/StringHash.hpp
new file mode 100644
index 0000000..b8d27cf
--- /dev/null
+++ b/StringHash.hpp
@@ -0,0 +1,40 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * OutputData.hpp
+ *
+ *  Created on: Sep 7, 2009
+ *      Author: caa
+ */
+
+#ifndef STRINGHASH_HPP_
+#define STRINGHASH_HPP_
+#include <vector>
+#include <ext/hash_map>
+#include <string>
+#include <iostream>
+#include "foreach.hpp"
+using namespace std;
+using namespace boost;
+using __gnu_cxx::hash;
+  
+struct my_hash_funct : public unary_function<string, size_t>
+{	
+	size_t operator()(const string & x) const { return hash<const char*>() (x.c_str()); }
+};
+
+template<class T>  class string_hash : public hash_map<string, T, my_hash_funct> {};
+
+#endif /* OUTPUTDATA_HPP_ */
diff --git a/Utils.hpp b/Utils.hpp
new file mode 100644
index 0000000..1b4e41b
--- /dev/null
+++ b/Utils.hpp
@@ -0,0 +1,51 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * Utils.hpp
+ *
+ *  Created on: Mar 11, 2009
+ *      Author: caa
+ */
+
+#ifndef UTILS_HPP_
+#define UTILS_HPP_
+#include <string>
+#include <sstream>
+#include <iostream>
+using namespace std;
+
+inline double addLogs(const double l1, const double l2)
+{
+	if (l1>l2) {
+		double diff=l2-l1;
+		return l1+log(1.0+exp(diff));
+	} else {
+		double diff=l1-l2;
+		return l2+log(1.0+exp(diff));
+	}
+}
+
+template <class T>
+bool from_string(T& t,
+                 const std::string& s,
+                 std::ios_base& (*f)(std::ios_base&))
+{
+  std::istringstream iss(s);
+  return !(iss >> f >> t).fail();
+}
+
+
+
+#endif /* UTILS_HPP_ */
diff --git a/Variant.hpp b/Variant.hpp
new file mode 100644
index 0000000..36da630
--- /dev/null
+++ b/Variant.hpp
@@ -0,0 +1,179 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * Variant.hpp
+ *
+ *  Created on: Aug 27, 2009
+ *      Author: caa
+ */
+
+#ifndef VARIANT_HPP_
+#define VARIANT_HPP_
+#include <string>
+#include <vector>
+
+using namespace std;
+
+
+class Variant
+{
+public:
+	Variant() {};
+	Variant(const string & _str) { initFromString(_str); }
+
+	int size() const { return length; };
+	typedef enum { INS, DEL, SNP, REF } Type;
+	const string & getString() const { return str; };
+	const string & getSeq() const { return seq; }
+	Type getType() const { return type; }
+	bool isIndel() const { if (type==INS || type==DEL) return true; else return false; };
+	bool isSNP() const { if (type==SNP) return true; else return false; };
+	bool isRef() const { if (type==REF) return true; else return false; };
+protected:
+	void initFromString(const string & str)
+	{
+		int ok=1;
+		if (str.size()>1) {
+			if (str[0]=='-') {
+				// deletion
+				length=int(str.size())-1;
+				seq=str.substr(1, length);
+				type=Variant::DEL;
+			} else if (str[0]=='+') {
+				length=(int(str.size())-1);
+				seq=str.substr(1, length);
+				type=Variant::INS;
+			} else if (str.size()==4 && str[1]=='=' && str[2]=='>') {
+					type=Variant::SNP;
+					seq=str;
+					length=1;
+			} else if (str=="*REF") {
+					type=Variant::REF;
+					seq=string("*REF");
+					length=1;
+			} else ok=0;
+		} else ok=0;
+		if (!ok) { cout << "input string: " << str << endl; throw string("Unrecognized variant"); }
+		this->str=str;
+	}
+	Type type;
+	string seq;
+	string str;
+	int length;
+};
+
+
+class AlignedVariant : public Variant
+{
+public:
+	AlignedVariant() {};
+	AlignedVariant(const string & _str, int _startHap, int _endHap, int _startRead, int _endRead)
+	{
+		initFromString(_str);
+		startHap=_startHap;
+		endHap=_endHap;
+		startRead=_startRead;
+		endRead=_endRead;
+
+		leftFlankHap = startHap;
+		leftFlankRead = startRead;
+
+		rightFlankHap = endHap;
+		rightFlankRead = endRead;
+
+
+		freq = -1.0;
+		addComb = false;
+	}
+	AlignedVariant(const string & _str, int canonicalPos, double _freq=-1.0, bool _addComb = false)
+	{
+		initFromString(_str);
+		startHap = canonicalPos;
+		if (type==DEL) {
+			endHap = startHap+length-1;
+		} else {
+			endHap = startHap;
+		}
+		startRead=-1;
+		endRead = -1;
+
+		leftFlankHap = startHap;
+		leftFlankRead = startRead;
+
+		rightFlankHap = endHap;
+		rightFlankRead = endRead;
+
+
+		freq=_freq;
+		addComb = _addComb;
+	}
+
+
+
+	bool isCovered(int pad, int firstBase, int lastBase) const
+	{
+		if (firstBase+pad<=startRead && lastBase-pad>=endRead) return true; else return false;
+	}
+
+	bool operator<(const AlignedVariant & v) const
+	{
+		if (startHap!=v.startHap) return startHap<v.startHap; else return this->getString()<v.getString();
+	}
+	bool isEqual(int pos, int type, const string & str) const {
+		if (this->type == type && this->startHap == pos) {
+			if (type == AlignedVariant::SNP) {
+				if (str.substr(1,3)==this->str.substr(1,3)) return true; else return false;
+			} else {
+				if (type == AlignedVariant::INS) {
+					if (this->getString() == str) return true; else return false;
+				} else if (type == AlignedVariant::DEL) {
+					if (this->getString().size()==str.size()) return true; else return false;
+				}
+			}
+		} else return false;
+		return false;
+	}
+	int getStartRead() const { return startRead; };
+	int getStartHap() const { return startHap; };
+	int getEndHap() const { return endHap; };
+	double getFreq() const  { return freq; };
+	bool getAddComb() const { return addComb; };
+
+	int getLeftFlankHap() const { return leftFlankHap; }
+	int getRightFlankHap() const { return rightFlankHap; }
+	int getLeftFlankRead() const { return leftFlankRead; }
+	int getRightFlankRead() const { return rightFlankRead; }
+
+	void setFlanking(int _leftFlankHap, int _rightFlankHap, int _leftFlankRead, int _rightFlankRead)
+	{
+		leftFlankRead  = _leftFlankRead;
+		rightFlankRead = _rightFlankRead;
+
+		leftFlankHap  = _leftFlankHap;
+		rightFlankHap = _rightFlankHap;
+	}
+
+protected:
+	int startHap, endHap;     // position of variant in the haplotype the read is aligned to.
+	int startRead, endRead;   // position of variant in the read aligned to the haplotype
+	int leftFlankHap, rightFlankHap; // position of left and right base flanking the indel in the _haplotype_ (ie the target sequence)
+	int leftFlankRead, rightFlankRead; // position of left and right base flanking the indel in the _read_ (ie the sequence aligned to the target sequence)
+	double freq;
+	bool addComb;              // add combinatorially in generation of candidate haplotypes?
+};
+
+
+
+#endif /* VARIANT_HPP_ */
diff --git a/VariantFile.hpp b/VariantFile.hpp
new file mode 100644
index 0000000..2598fac
--- /dev/null
+++ b/VariantFile.hpp
@@ -0,0 +1,304 @@
+/*    
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * VariantFile.hpp
+ *
+ *  Created on: Sep 9, 2009
+ *      Author: caa
+ */
+
+#ifndef VARIANTFILE_HPP_
+#define VARIANTFILE_HPP_
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include "Utils.hpp"
+#include "Variant.hpp"
+using namespace std;
+
+/*
+ * class used as input for realignment
+ */
+
+class AlignedCandidates
+{
+public:
+	AlignedCandidates()
+	{
+		tid="";
+	}
+	AlignedCandidates(const string & _tid, const vector<AlignedVariant> & _variants, int _leftPos, int _rightPos)
+	{
+		tid=_tid;
+		variants=_variants;
+		leftPos = _leftPos;
+		rightPos = _rightPos;
+		computePositions();
+	}
+	vector<AlignedVariant> variants;
+	string tid;
+	int centerPos, leftPos, rightPos;
+	const AlignedVariant * findVariant(int pos, int type, const string & str) const
+	{
+		//cout << " ******** " << endl;
+		for (size_t x=0;x<variants.size();x++) {
+			//cout << "pos: " << pos << " variants[x].pos: " << variants[x].getStartHap() << " " << variants[x].getString() << endl;
+			if (variants[x].isEqual(pos, type, str)) return (const AlignedVariant *) &(variants[x]);
+		}
+	//	cout << " ******** " << endl;
+		return NULL;
+	}
+private:
+	void computePositions()
+	{
+		centerPos = leftPos+(rightPos-leftPos)/2;
+	}
+};
+
+
+class VariantFile
+{
+public:
+	class Candidates
+	{
+	public:
+		Candidates()
+		{
+			tid="";
+			pos=0;
+			prior=-1.0;
+		}
+		Candidates(const string & _tid, uint32_t _pos, double _prior, const vector<Variant> & _variants, const vector<double> & _freqs)
+		{
+			tid=_tid;
+			pos=_pos;
+			prior=_prior;
+			variants=_variants;
+			freqs=_freqs;
+		}
+		vector<Variant> variants;
+		vector<double> freqs;
+		double prior;
+		string tid;
+		uint32_t pos;
+	};
+
+
+public:
+	VariantFile(const string & fileName)
+	{
+		index=0;
+		isOpen=false;
+		open(fileName);
+	}
+
+	int open(const string & fileName)
+	{
+		fin.open(fileName.c_str());
+		if (!fin.is_open()) throw string("Cannot open variant file ").append(fileName);
+		isOpen=true;
+		return 0;
+	}
+
+	bool eof() { if (isOpen) return fin.eof(); else return true; };
+
+	Candidates getLine(bool isOneBased=false)
+	{
+		if (!isOpen) return empty;
+
+		uint32_t pos;
+		string tid;
+		double prior=-1.0;
+
+		string line;
+		getline(fin, line);
+		if (line.empty()) return empty;
+
+		istringstream is(line);
+
+		index++;
+
+		if (!is.eof()) is >> tid; else return empty;
+		if (!is.eof()) is >> pos; else return empty;
+
+		// convert to zero-based coordinates
+		if (isOneBased) pos--;
+
+		// get variants from line
+		vector<Variant> variants;
+		vector<double> freqs;
+
+		string col;
+
+		try {
+			while (!is.eof()) {
+				is >> col;
+				if ( col.size() && ( (col[0]!='-' && col[0] != '+' && col[0] != 'A' && col[0] != 'C' && col[0]!='G' && col[0]!='T' && col[0]!='R') ) ) break;
+				Variant variant(col);
+				if (variant.getSeq().size()!=0) variants.push_back(variant);
+			}
+		} catch (string err) {
+			cerr <<  "Could not parse variants in line " << index << " in variants file." << endl;
+			return empty;
+		}
+
+		if (col.find('#') != string::npos) return Candidates(tid, pos, prior, variants, freqs);
+
+		prior=from_string<double>(prior, col, std::dec);
+
+		bool error=false;
+		while (!is.eof()) {
+			string in;
+			is >> in;
+
+			if (in.find('#')!=string::npos) break;
+			double freq;
+			if (!from_string<double>(freq, in, std::dec)) { error=true; break; };
+			freqs.push_back(freq);
+		}
+
+		if (error || (!error && freqs.size()>0 && freqs.size()!=variants.size())) {
+			freqs.clear();
+			cerr << "Could not parse all frequencies in line " << index << " in variants file." << endl;
+		}
+
+		if (variants.size()==0) {
+			cerr << "Could not parse any variants in line: " << index << " SKIPPING." << endl;
+			return empty;
+		}
+
+		return Candidates(tid, pos, prior, variants, freqs);
+	}
+
+	AlignedCandidates getLineVector(bool isOneBased=false)
+	{
+		if (!isOpen) return aligned_empty;
+
+		uint32_t pos;
+		int leftPos, rightPos;
+		string tid;
+
+		string line;
+		getline(fin, line);
+		if (line.empty()) return aligned_empty;
+
+		istringstream is(line);
+
+		index++;
+
+		if (!is.eof()) is >> tid; else return aligned_empty;
+		if (!is.eof()) {
+			string str;
+			is >> str;
+			if (!from_string<int>(leftPos, str, std::dec)) throw string("Cannot read left boundary of region.");
+
+		} else return aligned_empty;
+		if (!is.eof()) {
+			string str;
+			is >> str;
+			if (!from_string<int>(rightPos, str, std::dec)) throw string("Cannot read left boundary of region.");
+
+		} else return aligned_empty;
+
+		//cout << "leftPos: " << leftPos << " rightPos: " << rightPos << endl;
+
+		// get variants from line
+		vector<AlignedVariant> variants;
+		vector<double> freqs;
+
+		string col;
+
+		try {
+			while (!is.eof()) {
+				string pvf_str;
+				if (!is.eof()) is >> pvf_str;
+				//cout << "pvf_str: " << pvf_str << endl;
+
+				if (pvf_str.empty()) break;
+				if (pvf_str[0]=='#' || pvf_str[0]=='%') break;
+
+				vector<string> els;
+				int lastpos=0;
+				for (int x=0;x<int(pvf_str.size());x++) {
+					if ((pvf_str[x]==';' || pvf_str[x]==',') && x-lastpos>0) {
+						els.push_back(pvf_str.substr(lastpos,x-lastpos));
+					//	cout << "els " << x << " : " << els[els.size()-1] << endl;
+						lastpos=x+1;
+					}
+				}
+				els.push_back(pvf_str.substr(lastpos, pvf_str.size()-lastpos));
+
+				if (els.size()<2) {
+					cerr << "Error reading line in variantfile!\n";
+				} else {
+					double freq=-1.0;
+					bool addComb=false;
+					if (!from_string<uint32_t>(pos, els[0], std::dec)) throw string("Cannot read position");
+				// convert to zero-based coordinates
+					if (isOneBased) pos--;
+
+					string & col = els[1];
+					if ( col.size()==0 || ( (col[0]!='-' && col[0] != '+' && col[0] != 'A' && col[0] != 'C' && col[0]!='G' && col[0]!='T' && col[0]!='R') ) ) throw string("Unrecognized variant");
+
+					if (els.size()>2) {
+						if (!from_string<double>(freq, els[2],std::dec)) throw string("Cannot read prior/frequency");
+					}
+					if (els.size()>3) {
+						int addc;
+						if (!from_string<int>(addc, els[3],std::dec)) throw string("Cannot add_combinatorial");
+						if (addc) addComb = true;
+					}
+
+					AlignedVariant variant(col,pos, freq, addComb);
+					if (variant.getSeq().size()!=0) {
+						variants.push_back(variant);
+					}
+				}
+				// split into pos, var, col
+
+
+			}
+		} catch (string err) {
+			cerr <<  "Could not parse variants in line " << index << " in variants file." << endl;
+			cerr <<  "Error: " << err << endl;
+			return aligned_empty;
+		}
+
+		if (variants.size()==0) {
+			cerr << "Could not parse any variants in line: " << index << " SKIPPING." << endl;
+			return aligned_empty;
+		}
+
+		return AlignedCandidates(tid, variants, leftPos, rightPos);
+	}
+
+	~VariantFile()
+	{
+		fin.close();
+	}
+
+protected:
+	ifstream fin;
+	bool isOpen;
+	Candidates empty;
+	AlignedCandidates aligned_empty;
+	int index;
+};
+
+
+#endif /* VARIANTFILE_HPP_ */
diff --git a/digamma.hpp b/digamma.hpp
new file mode 100644
index 0000000..a1008d2
--- /dev/null
+++ b/digamma.hpp
@@ -0,0 +1,450 @@
+//  (C) Copyright John Maddock 2006.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_MATH_SF_DIGAMMA_HPP
+#define BOOST_MATH_SF_DIGAMMA_HPP
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+#include <boost/math/tools/rational.hpp>
+#include <boost/math/tools/promotion.hpp>
+#include <boost/math/policies/error_handling.hpp>
+#include <boost/math/constants/constants.hpp>
+#include <boost/mpl/comparison.hpp>
+
+namespace boost{
+namespace math{
+namespace detail{
+//
+// Begin by defining the smallest value for which it is safe to
+// use the asymptotic expansion for digamma:
+//
+inline unsigned digamma_large_lim(const mpl::int_<0>*)
+{  return 20;  }
+
+inline unsigned digamma_large_lim(const void*)
+{  return 10;  }
+//
+// Implementations of the asymptotic expansion come next,
+// the coefficients of the series have been evaluated
+// in advance at high precision, and the series truncated
+// at the first term that's too small to effect the result.
+// Note that the series becomes divergent after a while
+// so truncation is very important.
+//
+// This first one gives 34-digit precision for x >= 20:
+//
+template <class T>
+inline T digamma_imp_large(T x, const mpl::int_<0>*)
+{
+   BOOST_MATH_STD_USING // ADL of std functions.
+   static const T P[] = {
+      0.083333333333333333333333333333333333333333333333333L,
+      -0.0083333333333333333333333333333333333333333333333333L,
+      0.003968253968253968253968253968253968253968253968254L,
+      -0.0041666666666666666666666666666666666666666666666667L,
+      0.0075757575757575757575757575757575757575757575757576L,
+      -0.021092796092796092796092796092796092796092796092796L,
+      0.083333333333333333333333333333333333333333333333333L,
+      -0.44325980392156862745098039215686274509803921568627L,
+      3.0539543302701197438039543302701197438039543302701L,
+      -26.456212121212121212121212121212121212121212121212L,
+      281.4601449275362318840579710144927536231884057971L,
+      -3607.510546398046398046398046398046398046398046398L,
+      54827.583333333333333333333333333333333333333333333L,
+      -974936.82385057471264367816091954022988505747126437L,
+      20052695.796688078946143462272494530559046688078946L,
+      -472384867.72162990196078431372549019607843137254902L,
+      12635724795.916666666666666666666666666666666666667L
+   };
+   x -= 1;
+   T result = log(x);
+   result += 1 / (2 * x);
+   T z = 1 / (x*x);
+   result -= z * tools::evaluate_polynomial(P, z);
+   return result;
+}
+//
+// 19-digit precision for x >= 10:
+//
+template <class T>
+inline T digamma_imp_large(T x, const mpl::int_<64>*)
+{
+   BOOST_MATH_STD_USING // ADL of std functions.
+   static const T P[] = {
+      0.083333333333333333333333333333333333333333333333333L,
+      -0.0083333333333333333333333333333333333333333333333333L,
+      0.003968253968253968253968253968253968253968253968254L,
+      -0.0041666666666666666666666666666666666666666666666667L,
+      0.0075757575757575757575757575757575757575757575757576L,
+      -0.021092796092796092796092796092796092796092796092796L,
+      0.083333333333333333333333333333333333333333333333333L,
+      -0.44325980392156862745098039215686274509803921568627L,
+      3.0539543302701197438039543302701197438039543302701L,
+      -26.456212121212121212121212121212121212121212121212L,
+      281.4601449275362318840579710144927536231884057971L,
+   };
+   x -= 1;
+   T result = log(x);
+   result += 1 / (2 * x);
+   T z = 1 / (x*x);
+   result -= z * tools::evaluate_polynomial(P, z);
+   return result;
+}
+//
+// 17-digit precision for x >= 10:
+//
+template <class T>
+inline T digamma_imp_large(T x, const mpl::int_<53>*)
+{
+   BOOST_MATH_STD_USING // ADL of std functions.
+   static const T P[] = {
+      0.083333333333333333333333333333333333333333333333333L,
+      -0.0083333333333333333333333333333333333333333333333333L,
+      0.003968253968253968253968253968253968253968253968254L,
+      -0.0041666666666666666666666666666666666666666666666667L,
+      0.0075757575757575757575757575757575757575757575757576L,
+      -0.021092796092796092796092796092796092796092796092796L,
+      0.083333333333333333333333333333333333333333333333333L,
+      -0.44325980392156862745098039215686274509803921568627L
+   };
+   x -= 1;
+   T result = log(x);
+   result += 1 / (2 * x);
+   T z = 1 / (x*x);
+   result -= z * tools::evaluate_polynomial(P, z);
+   return result;
+}
+//
+// 9-digit precision for x >= 10:
+//
+template <class T>
+inline T digamma_imp_large(T x, const mpl::int_<24>*)
+{
+   BOOST_MATH_STD_USING // ADL of std functions.
+   static const T P[] = {
+      0.083333333333333333333333333333333333333333333333333L,
+      -0.0083333333333333333333333333333333333333333333333333L,
+      0.003968253968253968253968253968253968253968253968254L
+   };
+   x -= 1;
+   T result = log(x);
+   result += 1 / (2 * x);
+   T z = 1 / (x*x);
+   result -= z * tools::evaluate_polynomial(P, z);
+   return result;
+}
+//
+// Now follow rational approximations over the range [1,2].
+//
+// 35-digit precision:
+//
+template <class T>
+T digamma_imp_1_2(T x, const mpl::int_<0>*)
+{
+   //
+   // Now the approximation, we use the form:
+   //
+   // digamma(x) = (x - root) * (Y + R(x-1))
+   //
+   // Where root is the location of the positive root of digamma,
+   // Y is a constant, and R is optimised for low absolute error
+   // compared to Y.
+   //
+   // Max error found at 128-bit long double precision:  5.541e-35
+   // Maximum Deviation Found (approximation error):     1.965e-35
+   //
+   static const float Y = 0.99558162689208984375F;
+
+   static const T root1 = 1569415565.0 / 1073741824uL;
+   static const T root2 = (381566830.0 / 1073741824uL) / 1073741824uL;
+   static const T root3 = ((111616537.0 / 1073741824uL) / 1073741824uL) / 1073741824uL;
+   static const T root4 = (((503992070.0 / 1073741824uL) / 1073741824uL) / 1073741824uL) / 1073741824uL;
+   static const T root5 = 0.52112228569249997894452490385577338504019838794544e-36L;
+
+   static const T P[] = {    
+      0.25479851061131551526977464225335883769L,
+      -0.18684290534374944114622235683619897417L,
+      -0.80360876047931768958995775910991929922L,
+      -0.67227342794829064330498117008564270136L,
+      -0.26569010991230617151285010695543858005L,
+      -0.05775672694575986971640757748003553385L,
+      -0.0071432147823164975485922555833274240665L,
+      -0.00048740753910766168912364555706064993274L,
+      -0.16454996865214115723416538844975174761e-4L,
+      -0.20327832297631728077731148515093164955e-6L
+   };
+   static const T Q[] = {    
+      1,
+      2.6210924610812025425088411043163287646L,
+      2.6850757078559596612621337395886392594L,
+      1.4320913706209965531250495490639289418L,
+      0.4410872083455009362557012239501953402L,
+      0.081385727399251729505165509278152487225L,
+      0.0089478633066857163432104815183858149496L,
+      0.00055861622855066424871506755481997374154L,
+      0.1760168552357342401304462967950178554e-4L,
+      0.20585454493572473724556649516040874384e-6L,
+      -0.90745971844439990284514121823069162795e-11L,
+      0.48857673606545846774761343500033283272e-13L,
+   };
+   T g = x - root1;
+   g -= root2;
+   g -= root3;
+   g -= root4;
+   g -= root5;
+   T r = tools::evaluate_polynomial(P, x-1) / tools::evaluate_polynomial(Q, x-1);
+   T result = g * Y + g * r;
+
+   return result;
+}
+//
+// 19-digit precision:
+//
+template <class T>
+T digamma_imp_1_2(T x, const mpl::int_<64>*)
+{
+   //
+   // Now the approximation, we use the form:
+   //
+   // digamma(x) = (x - root) * (Y + R(x-1))
+   //
+   // Where root is the location of the positive root of digamma,
+   // Y is a constant, and R is optimised for low absolute error
+   // compared to Y.
+   //
+   // Max error found at 80-bit long double precision:   5.016e-20
+   // Maximum Deviation Found (approximation error):     3.575e-20
+   //
+   static const float Y = 0.99558162689208984375F;
+
+   static const T root1 = 1569415565.0 / 1073741824uL;
+   static const T root2 = (381566830.0 / 1073741824uL) / 1073741824uL;
+   static const T root3 = 0.9016312093258695918615325266959189453125e-19L;
+
+   static const T P[] = {    
+      0.254798510611315515235L,
+      -0.314628554532916496608L,
+      -0.665836341559876230295L,
+      -0.314767657147375752913L,
+      -0.0541156266153505273939L,
+      -0.00289268368333918761452L
+   };
+   static const T Q[] = {    
+      1,
+      2.1195759927055347547L,
+      1.54350554664961128724L,
+      0.486986018231042975162L,
+      0.0660481487173569812846L,
+      0.00298999662592323990972L,
+      -0.165079794012604905639e-5L,
+      0.317940243105952177571e-7L
+   };
+   T g = x - root1;
+   g -= root2;
+   g -= root3;
+   T r = tools::evaluate_polynomial(P, x-1) / tools::evaluate_polynomial(Q, x-1);
+   T result = g * Y + g * r;
+
+   return result;
+}
+//
+// 18-digit precision:
+//
+template <class T>
+T digamma_imp_1_2(T x, const mpl::int_<53>*)
+{
+   //
+   // Now the approximation, we use the form:
+   //
+   // digamma(x) = (x - root) * (Y + R(x-1))
+   //
+   // Where root is the location of the positive root of digamma,
+   // Y is a constant, and R is optimised for low absolute error
+   // compared to Y.
+   //
+   // Maximum Deviation Found:               1.466e-18
+   // At double precision, max error found:  2.452e-17
+   //
+   static const float Y = 0.99558162689208984F;
+
+   static const T root1 = 1569415565.0 / 1073741824uL;
+   static const T root2 = (381566830.0 / 1073741824uL) / 1073741824uL;
+   static const T root3 = 0.9016312093258695918615325266959189453125e-19L;
+
+   static const T P[] = {    
+      0.25479851061131551L,
+      -0.32555031186804491L,
+      -0.65031853770896507L,
+      -0.28919126444774784L,
+      -0.045251321448739056L,
+      -0.0020713321167745952L
+   };
+   static const T Q[] = {    
+      1L,
+      2.0767117023730469L,
+      1.4606242909763515L,
+      0.43593529692665969L,
+      0.054151797245674225L,
+      0.0021284987017821144L,
+      -0.55789841321675513e-6L
+   };
+   T g = x - root1;
+   g -= root2;
+   g -= root3;
+   T r = tools::evaluate_polynomial(P, x-1) / tools::evaluate_polynomial(Q, x-1);
+   T result = g * Y + g * r;
+
+   return result;
+}
+//
+// 9-digit precision:
+//
+template <class T>
+inline T digamma_imp_1_2(T x, const mpl::int_<24>*)
+{
+   //
+   // Now the approximation, we use the form:
+   //
+   // digamma(x) = (x - root) * (Y + R(x-1))
+   //
+   // Where root is the location of the positive root of digamma,
+   // Y is a constant, and R is optimised for low absolute error
+   // compared to Y.
+   //
+   // Maximum Deviation Found:              3.388e-010
+   // At float precision, max error found:  2.008725e-008
+   //
+   static const float Y = 0.99558162689208984f;
+   static const T root = 1532632.0f / 1048576;
+   static const T root_minor = static_cast<T>(0.3700660185912626595423257213284682051735604e-6L);
+   static const T P[] = {    
+      0.25479851023250261e0,
+      -0.44981331915268368e0,
+      -0.43916936919946835e0,
+      -0.61041765350579073e-1
+   };
+   static const T Q[] = {    
+      0.1e1,
+      0.15890202430554952e1,
+      0.65341249856146947e0,
+      0.63851690523355715e-1
+   };
+   T g = x - root;
+   g -= root_minor;
+   T r = tools::evaluate_polynomial(P, x-1) / tools::evaluate_polynomial(Q, x-1);
+   T result = g * Y + g * r;
+
+   return result;
+}
+
+template <class T, class Tag, class Policy>
+T digamma_imp(T x, const Tag* t, const Policy& pol)
+{
+   //
+   // This handles reflection of negative arguments, and all our
+   // error handling, then forwards to the T-specific approximation.
+   //
+   BOOST_MATH_STD_USING // ADL of std functions.
+
+   T result = 0;
+   //
+   // Check for negative arguments and use reflection:
+   //
+   if(x < 0)
+   {
+      // Reflect:
+      x = 1 - x;
+      // Argument reduction for tan:
+      T remainder = x - floor(x);
+      // Shift to negative if > 0.5:
+      if(remainder > 0.5)
+      {
+         remainder -= 1;
+      }
+      //
+      // check for evaluation at a negative pole:
+      //
+      if(remainder == 0)
+      {
+         return policies::raise_pole_error<T>("boost::math::digamma<%1%>(%1%)", 0, (1-x), pol);
+      }
+      result = constants::pi<T>() / tan(constants::pi<T>() * remainder);
+   }
+   //
+   // If we're above the lower-limit for the
+   // asymptotic expansion then use it:
+   //
+   if(x >= digamma_large_lim(t))
+   {
+      result += digamma_imp_large(x, t);
+   }
+   else
+   {
+      //
+      // If x > 2 reduce to the interval [1,2]:
+      //
+      while(x > 2)
+      {
+         x -= 1;
+         result += 1/x;
+      }
+      //
+      // If x < 1 use recurrance to shift to > 1:
+      //
+      if(x < 1)
+      {
+         result = -1/x;
+         x += 1;
+      }
+      result += digamma_imp_1_2(x, t);
+   }
+   return result;
+}
+
+} // namespace detail
+
+template <class T, class Policy>
+inline typename tools::promote_args<T>::type 
+   digamma(T x, const Policy& pol)
+{
+   typedef typename tools::promote_args<T>::type result_type;
+   typedef typename policies::evaluation<result_type, Policy>::type value_type;
+   typedef typename policies::precision<T, Policy>::type precision_type;
+   typedef typename mpl::if_<
+      mpl::or_<
+         mpl::less_equal<precision_type, mpl::int_<0> >,
+         mpl::greater<precision_type, mpl::int_<64> >
+      >,
+      mpl::int_<0>,
+      typename mpl::if_<
+         mpl::less<precision_type, mpl::int_<25> >,
+         mpl::int_<24>,
+         typename mpl::if_<
+            mpl::less<precision_type, mpl::int_<54> >,
+            mpl::int_<53>,
+            mpl::int_<64>
+         >::type
+      >::type
+   >::type tag_type;
+
+   return policies::checked_narrowing_cast<result_type, Policy>(detail::digamma_imp(
+      static_cast<value_type>(x),
+      static_cast<const tag_type*>(0), pol), "boost::math::digamma<%1%>(%1%)");
+}
+
+template <class T>
+inline typename tools::promote_args<T>::type 
+   digamma(T x)
+{
+   return digamma(x, policies::policy<>());
+}
+
+} // namespace math
+} // namespace boost
+#endif
+
diff --git a/foreach.hpp b/foreach.hpp
new file mode 100644
index 0000000..b9018f8
--- /dev/null
+++ b/foreach.hpp
@@ -0,0 +1,812 @@
+///////////////////////////////////////////////////////////////////////////////
+// foreach.hpp header file
+//
+// Copyright 2004 Eric Niebler.
+// Distributed under the Boost Software License, Version 1.0. (See
+// accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
+// Credits:
+//  Anson Tsao        - for the initial inspiration and several good suggestions.
+//  Thorsten Ottosen  - for Boost.Range, and for suggesting a way to detect
+//                      const-qualified rvalues at compile time on VC7.1+
+//  Russell Hind      - For help porting to Borland
+//  Alisdair Meredith - For help porting to Borland
+//  Stefan Slapeta    - For help porting to Intel
+
+#ifndef BOOST_FOREACH
+
+// MS compatible compilers support #pragma once
+#if defined(_MSC_VER) && (_MSC_VER >= 1020)
+# pragma once
+#endif
+
+#include <cstddef>
+#include <utility>  // for std::pair
+
+#include <boost/config.hpp>
+#include <boost/detail/workaround.hpp>
+
+// Some compilers let us detect even const-qualified rvalues at compile-time
+#if BOOST_WORKAROUND(BOOST_MSVC, >= 1310)                                                       \
+ || (BOOST_WORKAROUND(__GNUC__, >= 4) && !defined(BOOST_INTEL))                                 \
+ || (BOOST_WORKAROUND(__GNUC__, == 3) && (__GNUC_MINOR__ >= 4) && !defined(BOOST_INTEL))
+# define BOOST_FOREACH_COMPILE_TIME_CONST_RVALUE_DETECTION
+#else
+// Some compilers allow temporaries to be bound to non-const references.
+// These compilers make it impossible to for BOOST_FOREACH to detect
+// temporaries and avoid reevaluation of the collection expression.
+# if BOOST_WORKAROUND(BOOST_MSVC, <= 1300)                                                      \
+  || BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x564))                                     \
+  || (BOOST_WORKAROUND(BOOST_INTEL_CXX_VERSION, <= 700) && defined(_MSC_VER))                   \
+  || BOOST_WORKAROUND(__SUNPRO_CC, BOOST_TESTED_AT(0x570))                                      \
+  || BOOST_WORKAROUND(__DECCXX_VER, BOOST_TESTED_AT(60590042))
+#  define BOOST_FOREACH_NO_RVALUE_DETECTION
+# endif
+// Some compilers do not correctly implement the lvalue/rvalue conversion
+// rules of the ternary conditional operator.
+# if defined(BOOST_FOREACH_NO_RVALUE_DETECTION)                                                 \
+  || defined(BOOST_NO_SFINAE)                                                                   \
+  || BOOST_WORKAROUND(BOOST_MSVC, BOOST_TESTED_AT(1400))                                        \
+  || BOOST_WORKAROUND(BOOST_INTEL_WIN, <= 810)                                                  \
+  || BOOST_WORKAROUND(__GNUC__, < 3)                                                            \
+  || (BOOST_WORKAROUND(__GNUC__, == 3) && (__GNUC_MINOR__ <= 2))                                \
+  || (BOOST_WORKAROUND(__GNUC__, == 3) && (__GNUC_MINOR__ <= 3) && defined(__APPLE_CC__))       \
+  || BOOST_WORKAROUND(__IBMCPP__, BOOST_TESTED_AT(600))                                         \
+  || BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3206))
+#  define BOOST_FOREACH_NO_CONST_RVALUE_DETECTION
+# else
+#  define BOOST_FOREACH_RUN_TIME_CONST_RVALUE_DETECTION
+# endif
+#endif
+
+#include <boost/mpl/if.hpp>
+#include <boost/mpl/logical.hpp>
+#include <boost/mpl/eval_if.hpp>
+#include <boost/noncopyable.hpp>
+#include <boost/range/end.hpp>
+#include <boost/range/begin.hpp>
+#include <boost/range/result_iterator.hpp>
+#include <boost/type_traits/is_array.hpp>
+#include <boost/type_traits/is_const.hpp>
+#include <boost/type_traits/is_abstract.hpp>
+#include <boost/type_traits/is_base_and_derived.hpp>
+#include <boost/iterator/iterator_traits.hpp>
+#include <boost/utility/addressof.hpp>
+
+#ifdef BOOST_FOREACH_RUN_TIME_CONST_RVALUE_DETECTION
+# include <new>
+# include <boost/aligned_storage.hpp>
+# include <boost/utility/enable_if.hpp>
+# include <boost/type_traits/remove_const.hpp>
+#endif
+
+// This must be at global scope, hence the uglified name
+enum boost_foreach_argument_dependent_lookup_hack
+{
+    boost_foreach_argument_dependent_lookup_hack_value
+};
+
+namespace boost
+{
+
+// forward declarations for iterator_range
+template<typename T>
+class iterator_range;
+
+// forward declarations for sub_range
+template<typename T>
+class sub_range;
+
+namespace foreach
+{
+    ///////////////////////////////////////////////////////////////////////////////
+    // in_range
+    //
+    template<typename T>
+    inline std::pair<T, T> in_range(T begin, T end)
+    {
+        return std::make_pair(begin, end);
+    }
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // boost::foreach::tag
+    //
+    typedef boost_foreach_argument_dependent_lookup_hack tag;
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // boost::foreach::is_lightweight_proxy
+    //   Specialize this for user-defined collection types if they are inexpensive to copy.
+    //   This tells BOOST_FOREACH it can avoid the rvalue/lvalue detection stuff.
+    template<typename T>
+    struct is_lightweight_proxy
+      : boost::mpl::false_
+    {
+    };
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // boost::foreach::is_noncopyable
+    //   Specialize this for user-defined collection types if they cannot be copied.
+    //   This also tells BOOST_FOREACH to avoid the rvalue/lvalue detection stuff.
+    template<typename T>
+    struct is_noncopyable
+    #if !defined(BOOST_BROKEN_IS_BASE_AND_DERIVED) && !defined(BOOST_NO_IS_ABSTRACT)
+      : boost::mpl::or_<
+            boost::is_abstract<T>
+          , boost::is_base_and_derived<boost::noncopyable, T>
+        >
+    #elif !defined(BOOST_BROKEN_IS_BASE_AND_DERIVED)
+      : boost::is_base_and_derived<boost::noncopyable, T>
+    #elif !defined(BOOST_NO_IS_ABSTRACT)
+      : boost::is_abstract<T>
+    #else
+      : boost::mpl::false_
+    #endif
+    {
+    };
+
+} // namespace foreach
+
+} // namespace boost
+
+// vc6/7 needs help ordering the following overloads
+#ifdef BOOST_NO_FUNCTION_TEMPLATE_ORDERING
+# define BOOST_FOREACH_TAG_DEFAULT ...
+#else
+# define BOOST_FOREACH_TAG_DEFAULT boost::foreach::tag
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// boost_foreach_is_lightweight_proxy
+//   Another customization point for the is_lightweight_proxy optimization,
+//   this one works on legacy compilers. Overload boost_foreach_is_lightweight_proxy
+//   at the global namespace for your type.
+template<typename T>
+inline boost::foreach::is_lightweight_proxy<T> *
+boost_foreach_is_lightweight_proxy(T *&, BOOST_FOREACH_TAG_DEFAULT) { return 0; }
+
+template<typename T>
+inline boost::mpl::true_ *
+boost_foreach_is_lightweight_proxy(std::pair<T, T> *&, boost::foreach::tag) { return 0; }
+
+template<typename T>
+inline boost::mpl::true_ *
+boost_foreach_is_lightweight_proxy(boost::iterator_range<T> *&, boost::foreach::tag) { return 0; }
+
+template<typename T>
+inline boost::mpl::true_ *
+boost_foreach_is_lightweight_proxy(boost::sub_range<T> *&, boost::foreach::tag) { return 0; }
+
+template<typename T>
+inline boost::mpl::true_ *
+boost_foreach_is_lightweight_proxy(T **&, boost::foreach::tag) { return 0; }
+
+///////////////////////////////////////////////////////////////////////////////
+// boost_foreach_is_noncopyable
+//   Another customization point for the is_noncopyable trait,
+//   this one works on legacy compilers. Overload boost_foreach_is_noncopyable
+//   at the global namespace for your type.
+template<typename T>
+inline boost::foreach::is_noncopyable<T> *
+boost_foreach_is_noncopyable(T *&, BOOST_FOREACH_TAG_DEFAULT) { return 0; }
+
+namespace boost
+{
+
+namespace foreach_detail_
+{
+
+///////////////////////////////////////////////////////////////////////////////
+// Define some utilities for assessing the properties of expressions
+//
+typedef char yes_type;
+typedef char (&no_type)[2];
+yes_type is_true(boost::mpl::true_ *);
+no_type is_true(boost::mpl::false_ *);
+
+// Extracts the desired property from the expression without evaluating it
+#define BOOST_FOREACH_PROTECT(expr)                                                             \
+    (static_cast<boost::mpl::bool_<1 == sizeof(boost::foreach_detail_::is_true(expr))> *>(0))
+
+template<typename Bool1, typename Bool2>
+inline boost::mpl::and_<Bool1, Bool2> *and_(Bool1 *, Bool2 *) { return 0; }
+
+template<typename Bool1, typename Bool2, typename Bool3>
+inline boost::mpl::and_<Bool1, Bool2, Bool3> *and_(Bool1 *, Bool2 *, Bool3 *) { return 0; }
+
+template<typename Bool1, typename Bool2>
+inline boost::mpl::or_<Bool1, Bool2> *or_(Bool1 *, Bool2 *) { return 0; }
+
+template<typename Bool1, typename Bool2, typename Bool3>
+inline boost::mpl::or_<Bool1, Bool2, Bool3> *or_(Bool1 *, Bool2 *, Bool3 *) { return 0; }
+
+template<typename Bool>
+inline boost::mpl::not_<Bool> *not_(Bool *) { return 0; }
+
+template<typename T>
+inline boost::mpl::false_ *is_rvalue_(T &, int) { return 0; }
+
+template<typename T>
+inline boost::mpl::true_ *is_rvalue_(T const &, ...) { return 0; }
+
+template<typename T>
+inline boost::is_array<T> *is_array_(T const &) { return 0; }
+
+template<typename T>
+inline boost::is_const<T> *is_const_(T &) { return 0; }
+
+#ifndef BOOST_FOREACH_NO_RVALUE_DETECTION
+template<typename T>
+inline boost::mpl::true_ *is_const_(T const &) { return 0; }
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// auto_any_t/auto_any
+//  General utility for putting an object of any type into automatic storage
+struct auto_any_base
+{
+    // auto_any_base must evaluate to false in boolean context so that
+    // they can be declared in if() statements.
+    operator bool() const
+    {
+        return false;
+    }
+};
+
+template<typename T>
+struct auto_any : auto_any_base
+{
+    auto_any(T const &t)
+      : item(t)
+    {
+    }
+
+    // temporaries of type auto_any will be bound to const auto_any_base
+    // references, but we still want to be able to mutate the stored
+    // data, so declare it as mutable.
+    mutable T item;
+};
+
+typedef auto_any_base const &auto_any_t;
+
+template<typename T, typename C>
+inline BOOST_DEDUCED_TYPENAME boost::mpl::if_<C, T const, T>::type &auto_any_cast(auto_any_t a)
+{
+    return static_cast<auto_any<T> const &>(a).item;
+}
+
+typedef boost::mpl::true_ const_;
+
+///////////////////////////////////////////////////////////////////////////////
+// type2type
+//
+template<typename T, typename C = boost::mpl::false_>
+struct type2type
+  : boost::mpl::if_<C, T const, T>
+{
+};
+
+template<typename T, typename C = boost::mpl::false_>
+struct foreach_iterator
+{
+    typedef BOOST_DEDUCED_TYPENAME boost::mpl::eval_if<
+        C
+      , range_const_iterator<T>
+      , range_iterator<T>
+    >::type type;
+};
+
+template<typename T, typename C = boost::mpl::false_>
+struct foreach_reference
+  : iterator_reference<BOOST_DEDUCED_TYPENAME foreach_iterator<T, C>::type>
+{
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// encode_type
+//
+template<typename T>
+inline type2type<T> *encode_type(T &, boost::mpl::false_ *) { return 0; }
+
+template<typename T>
+inline type2type<T, const_> *encode_type(T const &, boost::mpl::true_ *) { return 0; }
+
+///////////////////////////////////////////////////////////////////////////////
+// set_false
+//
+inline bool set_false(bool &b) { return b = false; }
+
+///////////////////////////////////////////////////////////////////////////////
+// to_ptr
+//
+template<typename T>
+inline T *&to_ptr(T const &)
+{
+    static T *t = 0;
+    return t;
+}
+
+// Borland needs a little extra help with arrays
+#if BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x564))
+template<typename T,std::size_t N>
+inline T (*&to_ptr(T (&)[N]))[N]
+{
+    static T (*t)[N] = 0;
+    return t;
+}
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// derefof
+//
+template<typename T>
+inline T &derefof(T *t)
+{
+    // This is a work-around for a compiler bug in Borland. If T* is a pointer to array type U(*)[N],
+    // then dereferencing it results in a U* instead of U(&)[N]. The cast forces the issue.
+    return reinterpret_cast<T &>(
+        *const_cast<char *>(
+            reinterpret_cast<char const volatile *>(t)
+        )
+    );
+}
+
+#ifdef BOOST_FOREACH_COMPILE_TIME_CONST_RVALUE_DETECTION
+///////////////////////////////////////////////////////////////////////////////
+// Detect at compile-time whether an expression yields an rvalue or
+// an lvalue. This is rather non-standard, but some popular compilers
+// accept it.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// rvalue_probe
+//
+template<typename T>
+struct rvalue_probe
+{
+    struct private_type_ {};
+    // can't ever return an array by value
+    typedef BOOST_DEDUCED_TYPENAME boost::mpl::if_<
+        boost::mpl::or_<boost::is_abstract<T>, boost::is_array<T> >, private_type_, T
+    >::type value_type;
+    operator value_type();
+    operator T &() const;
+};
+
+template<typename T>
+rvalue_probe<T> const make_probe(T const &t);
+
+# define BOOST_FOREACH_IS_RVALUE(COL)                                                           \
+    boost::foreach_detail_::and_(                                                               \
+        boost::foreach_detail_::not_(boost::foreach_detail_::is_array_(COL))                    \
+      , BOOST_FOREACH_PROTECT(boost::foreach_detail_::is_rvalue_(                               \
+            (true ? boost::foreach_detail_::make_probe(COL) : (COL)), 0)))
+
+#elif defined(BOOST_FOREACH_RUN_TIME_CONST_RVALUE_DETECTION)
+///////////////////////////////////////////////////////////////////////////////
+// Detect at run-time whether an expression yields an rvalue
+// or an lvalue. This is 100% standard C++, but not all compilers
+// accept it. Also, it causes FOREACH to break when used with non-
+// copyable collection types.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// rvalue_probe
+//
+template<typename T>
+struct rvalue_probe
+{
+    rvalue_probe(T &t, bool &b)
+      : value(t)
+      , is_rvalue(b)
+    {
+    }
+
+    struct private_type_ {};
+    // can't ever return an array or an abstract type by value
+    #ifdef BOOST_NO_IS_ABSTRACT
+    typedef BOOST_DEDUCED_TYPENAME boost::mpl::if_<
+        boost::is_array<T>, private_type_, T
+    >::type value_type;
+    #else
+    typedef BOOST_DEDUCED_TYPENAME boost::mpl::if_<
+        boost::mpl::or_<boost::is_abstract<T>, boost::is_array<T> >, private_type_, T
+    >::type value_type;
+    #endif
+    
+    operator value_type()
+    {
+        this->is_rvalue = true;
+        return this->value;
+    }
+
+    operator T &() const
+    {
+        return this->value;
+    }
+
+private:
+    T &value;
+    bool &is_rvalue;
+};
+
+template<typename T>
+rvalue_probe<T> make_probe(T &t, bool &b) { return rvalue_probe<T>(t, b); }
+
+template<typename T>
+rvalue_probe<T const> make_probe(T const &t, bool &b)  { return rvalue_probe<T const>(t, b); }
+
+///////////////////////////////////////////////////////////////////////////////
+// simple_variant
+//  holds either a T or a T const*
+template<typename T>
+struct simple_variant
+{
+    simple_variant(T const *t)
+      : is_rvalue(false)
+    {
+        *static_cast<T const **>(this->data.address()) = t;
+    }
+
+    simple_variant(T const &t)
+      : is_rvalue(true)
+    {
+        ::new(this->data.address()) T(t);
+    }
+
+    simple_variant(simple_variant const &that)
+      : is_rvalue(that.is_rvalue)
+    {
+        if(this->is_rvalue)
+            ::new(this->data.address()) T(*that.get());
+        else
+            *static_cast<T const **>(this->data.address()) = that.get();
+    }
+
+    ~simple_variant()
+    {
+        if(this->is_rvalue)
+            this->get()->~T();
+    }
+
+    T const *get() const
+    {
+        if(this->is_rvalue)
+            return static_cast<T const *>(this->data.address());
+        else
+            return *static_cast<T const * const *>(this->data.address());
+    }
+
+private:
+    enum size_type { size = sizeof(T) > sizeof(T*) ? sizeof(T) : sizeof(T*) };
+    simple_variant &operator =(simple_variant const &); 
+    bool const is_rvalue;
+    aligned_storage<size> data;
+};
+
+// If the collection is an array or is noncopyable, it must be an lvalue.
+// If the collection is a lightweight proxy, treat it as an rvalue
+// BUGBUG what about a noncopyable proxy?
+template<typename LValue, typename IsProxy>
+inline BOOST_DEDUCED_TYPENAME boost::enable_if<boost::mpl::or_<LValue, IsProxy>, IsProxy>::type *
+should_copy_impl(LValue *, IsProxy *, bool *)
+{
+    return 0;
+}
+
+// Otherwise, we must determine at runtime whether it's an lvalue or rvalue
+inline bool *
+should_copy_impl(boost::mpl::false_ *, boost::mpl::false_ *, bool *is_rvalue)
+{
+    return is_rvalue;
+}
+
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// contain
+//
+template<typename T>
+inline auto_any<T> contain(T const &t, boost::mpl::true_ *) // rvalue
+{
+    return t;
+}
+
+template<typename T>
+inline auto_any<T *> contain(T &t, boost::mpl::false_ *) // lvalue
+{
+    // Cannot seem to get sunpro to handle addressof() with array types.
+    #if BOOST_WORKAROUND(__SUNPRO_CC, BOOST_TESTED_AT(0x570))
+    return &t;
+    #else
+    return boost::addressof(t);
+    #endif
+}
+
+#ifdef BOOST_FOREACH_RUN_TIME_CONST_RVALUE_DETECTION
+template<typename T>
+auto_any<simple_variant<T> >
+contain(T const &t, bool *rvalue)
+{
+    return *rvalue ? simple_variant<T>(t) : simple_variant<T>(&t);
+}
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// begin
+//
+template<typename T, typename C>
+inline auto_any<BOOST_DEDUCED_TYPENAME foreach_iterator<T, C>::type>
+begin(auto_any_t col, type2type<T, C> *, boost::mpl::true_ *) // rvalue
+{
+    return boost::begin(auto_any_cast<T, C>(col));
+}
+
+template<typename T, typename C>
+inline auto_any<BOOST_DEDUCED_TYPENAME foreach_iterator<T, C>::type>
+begin(auto_any_t col, type2type<T, C> *, boost::mpl::false_ *) // lvalue
+{
+    typedef BOOST_DEDUCED_TYPENAME type2type<T, C>::type type;
+    typedef BOOST_DEDUCED_TYPENAME foreach_iterator<T, C>::type iterator;
+    return iterator(boost::begin(derefof(auto_any_cast<type *, boost::mpl::false_>(col))));
+}
+
+#ifdef BOOST_FOREACH_RUN_TIME_CONST_RVALUE_DETECTION
+template<typename T>
+auto_any<BOOST_DEDUCED_TYPENAME foreach_iterator<T, const_>::type>
+begin(auto_any_t col, type2type<T, const_> *, bool *)
+{
+    return boost::begin(*auto_any_cast<simple_variant<T>, boost::mpl::false_>(col).get());
+}
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// end
+//
+template<typename T, typename C>
+inline auto_any<BOOST_DEDUCED_TYPENAME foreach_iterator<T, C>::type>
+end(auto_any_t col, type2type<T, C> *, boost::mpl::true_ *) // rvalue
+{
+    return boost::end(auto_any_cast<T, C>(col));
+}
+
+template<typename T, typename C>
+inline auto_any<BOOST_DEDUCED_TYPENAME foreach_iterator<T, C>::type>
+end(auto_any_t col, type2type<T, C> *, boost::mpl::false_ *) // lvalue
+{
+    typedef BOOST_DEDUCED_TYPENAME type2type<T, C>::type type;
+    typedef BOOST_DEDUCED_TYPENAME foreach_iterator<T, C>::type iterator;
+    return iterator(boost::end(derefof(auto_any_cast<type *, boost::mpl::false_>(col))));
+}
+
+#ifdef BOOST_FOREACH_RUN_TIME_CONST_RVALUE_DETECTION
+template<typename T>
+auto_any<BOOST_DEDUCED_TYPENAME foreach_iterator<T, const_>::type>
+end(auto_any_t col, type2type<T, const_> *, bool *)
+{
+    return boost::end(*auto_any_cast<simple_variant<T>, boost::mpl::false_>(col).get());
+}
+#endif
+
+#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING
+template<typename T, typename C>
+inline auto_any<int>
+end(auto_any_t col, type2type<T *, C> *, boost::mpl::true_ *) // null-terminated C-style strings
+{
+    return 0; // not used
+}
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// done
+//
+template<typename T, typename C>
+inline bool done(auto_any_t cur, auto_any_t end, type2type<T, C> *)
+{
+    typedef BOOST_DEDUCED_TYPENAME foreach_iterator<T, C>::type iter_t;
+    return auto_any_cast<iter_t, boost::mpl::false_>(cur) == auto_any_cast<iter_t, boost::mpl::false_>(end);
+}
+
+#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING
+template<typename T, typename C>
+inline bool done(auto_any_t cur, auto_any_t, type2type<T *, C> *) // null-terminated C-style strings
+{
+    return ! *auto_any_cast<T *, boost::mpl::false_>(cur);
+}
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// next
+//
+template<typename T, typename C>
+inline void next(auto_any_t cur, type2type<T, C> *)
+{
+    typedef BOOST_DEDUCED_TYPENAME foreach_iterator<T, C>::type iter_t;
+    ++auto_any_cast<iter_t, boost::mpl::false_>(cur);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// deref
+//
+template<typename T, typename C>
+inline BOOST_DEDUCED_TYPENAME foreach_reference<T, C>::type
+deref(auto_any_t cur, type2type<T, C> *)
+{
+    typedef BOOST_DEDUCED_TYPENAME foreach_iterator<T, C>::type iter_t;
+    return *auto_any_cast<iter_t, boost::mpl::false_>(cur);
+}
+
+} // namespace foreach_detail_
+} // namespace boost
+
+// A sneaky way to get the type of the collection without evaluating the expression
+#define BOOST_FOREACH_TYPEOF(COL)                                                               \
+    (true ? 0 : boost::foreach_detail_::encode_type(COL, boost::foreach_detail_::is_const_(COL)))
+
+// returns true_* if the type is noncopyable
+#define BOOST_FOREACH_IS_NONCOPYABLE(COL)                                                       \
+    boost_foreach_is_noncopyable(                                                               \
+        boost::foreach_detail_::to_ptr(COL)                                                     \
+      , boost_foreach_argument_dependent_lookup_hack_value)
+
+// returns true_* if the type is a lightweight proxy (and is not noncopyable)
+#define BOOST_FOREACH_IS_LIGHTWEIGHT_PROXY(COL)                                                 \
+    boost::foreach_detail_::and_(                                                               \
+        boost::foreach_detail_::not_(BOOST_FOREACH_IS_NONCOPYABLE(COL))                         \
+      , boost_foreach_is_lightweight_proxy(                                                     \
+            boost::foreach_detail_::to_ptr(COL)                                                 \
+          , boost_foreach_argument_dependent_lookup_hack_value))
+
+#ifdef BOOST_FOREACH_COMPILE_TIME_CONST_RVALUE_DETECTION
+///////////////////////////////////////////////////////////////////////////////
+// R-values and const R-values supported here with zero runtime overhead
+///////////////////////////////////////////////////////////////////////////////
+
+// No variable is needed to track the rvalue-ness of the collection expression
+# define BOOST_FOREACH_PREAMBLE()                                                               \
+    /**/
+
+// Evaluate the collection expression
+# define BOOST_FOREACH_EVALUATE(COL)                                                            \
+    (COL)
+
+# define BOOST_FOREACH_SHOULD_COPY(COL)                                                         \
+    (true ? 0 : boost::foreach_detail_::or_(                                                    \
+        BOOST_FOREACH_IS_RVALUE(COL)                                                            \
+      , BOOST_FOREACH_IS_LIGHTWEIGHT_PROXY(COL)))
+
+#elif defined(BOOST_FOREACH_RUN_TIME_CONST_RVALUE_DETECTION)
+///////////////////////////////////////////////////////////////////////////////
+// R-values and const R-values supported here
+///////////////////////////////////////////////////////////////////////////////
+
+// Declare a variable to track the rvalue-ness of the collection expression
+# define BOOST_FOREACH_PREAMBLE()                                                               \
+    if (bool _foreach_is_rvalue = false) {} else
+
+// Evaluate the collection expression, and detect if it is an lvalue or and rvalue
+# define BOOST_FOREACH_EVALUATE(COL)                                                            \
+    (true ? boost::foreach_detail_::make_probe((COL), _foreach_is_rvalue) : (COL))
+
+// The rvalue/lvalue-ness of the collection expression is determined dynamically, unless
+// type type is an array or is noncopyable or is non-const, in which case we know it's an lvalue.
+// If the type happens to be a lightweight proxy, always make a copy.
+# define BOOST_FOREACH_SHOULD_COPY(COL)                                                         \
+    (boost::foreach_detail_::should_copy_impl(                                                  \
+        true ? 0 : boost::foreach_detail_::or_(                                                 \
+            boost::foreach_detail_::is_array_(COL)                                              \
+          , BOOST_FOREACH_IS_NONCOPYABLE(COL)                                                   \
+          , boost::foreach_detail_::not_(boost::foreach_detail_::is_const_(COL)))               \
+      , true ? 0 : BOOST_FOREACH_IS_LIGHTWEIGHT_PROXY(COL)                                      \
+      , &_foreach_is_rvalue))
+
+#elif !defined(BOOST_FOREACH_NO_RVALUE_DETECTION)
+///////////////////////////////////////////////////////////////////////////////
+// R-values supported here, const R-values NOT supported here
+///////////////////////////////////////////////////////////////////////////////
+
+// No variable is needed to track the rvalue-ness of the collection expression
+# define BOOST_FOREACH_PREAMBLE()                                                               \
+    /**/
+
+// Evaluate the collection expression
+# define BOOST_FOREACH_EVALUATE(COL)                                                            \
+    (COL)
+
+// Determine whether the collection expression is an lvalue or an rvalue.
+// NOTE: this gets the answer wrong for const rvalues.
+# define BOOST_FOREACH_SHOULD_COPY(COL)                                                         \
+    (true ? 0 : boost::foreach_detail_::or_(                                                    \
+        boost::foreach_detail_::is_rvalue_((COL), 0)                                            \
+      , BOOST_FOREACH_IS_LIGHTWEIGHT_PROXY(COL)))
+
+#else
+///////////////////////////////////////////////////////////////////////////////
+// R-values NOT supported here
+///////////////////////////////////////////////////////////////////////////////
+
+// No variable is needed to track the rvalue-ness of the collection expression
+# define BOOST_FOREACH_PREAMBLE()                                                               \
+    /**/
+
+// Evaluate the collection expression
+# define BOOST_FOREACH_EVALUATE(COL)                                                            \
+    (COL)
+
+// Can't use rvalues with BOOST_FOREACH (unless they are lightweight proxies)
+# define BOOST_FOREACH_SHOULD_COPY(COL)                                                         \
+    (true ? 0 : BOOST_FOREACH_IS_LIGHTWEIGHT_PROXY(COL))
+
+#endif
+
+#define BOOST_FOREACH_CONTAIN(COL)                                                              \
+    boost::foreach_detail_::contain(                                                            \
+        BOOST_FOREACH_EVALUATE(COL)                                                             \
+      , BOOST_FOREACH_SHOULD_COPY(COL))
+
+#define BOOST_FOREACH_BEGIN(COL)                                                                \
+    boost::foreach_detail_::begin(                                                              \
+        _foreach_col                                                                            \
+      , BOOST_FOREACH_TYPEOF(COL)                                                               \
+      , BOOST_FOREACH_SHOULD_COPY(COL))
+
+#define BOOST_FOREACH_END(COL)                                                                  \
+    boost::foreach_detail_::end(                                                                \
+        _foreach_col                                                                            \
+      , BOOST_FOREACH_TYPEOF(COL)                                                               \
+      , BOOST_FOREACH_SHOULD_COPY(COL))
+
+#define BOOST_FOREACH_DONE(COL)                                                                 \
+    boost::foreach_detail_::done(                                                               \
+        _foreach_cur                                                                            \
+      , _foreach_end                                                                            \
+      , BOOST_FOREACH_TYPEOF(COL))
+
+#define BOOST_FOREACH_NEXT(COL)                                                                 \
+    boost::foreach_detail_::next(                                                               \
+        _foreach_cur                                                                            \
+      , BOOST_FOREACH_TYPEOF(COL))
+
+#define BOOST_FOREACH_DEREF(COL)                                                                \
+    boost::foreach_detail_::deref(                                                              \
+        _foreach_cur                                                                            \
+      , BOOST_FOREACH_TYPEOF(COL))
+
+///////////////////////////////////////////////////////////////////////////////
+// BOOST_FOREACH
+//
+//   For iterating over collections. Collections can be
+//   arrays, null-terminated strings, or STL containers.
+//   The loop variable can be a value or reference. For
+//   example:
+//
+//   std::list<int> int_list(/*stuff*/);
+//   BOOST_FOREACH(int &i, int_list)
+//   {
+//       /* 
+//        * loop body goes here.
+//        * i is a reference to the int in int_list.
+//        */
+//   }
+//
+//   Alternately, you can declare the loop variable first,
+//   so you can access it after the loop finishes. Obviously,
+//   if you do it this way, then the loop variable cannot be
+//   a reference.
+//
+//   int i;
+//   BOOST_FOREACH(i, int_list)
+//       { ... }
+//
+#define BOOST_FOREACH(VAR, COL)                                                                 \
+    BOOST_FOREACH_PREAMBLE()                                                                    \
+    if (boost::foreach_detail_::auto_any_t _foreach_col = BOOST_FOREACH_CONTAIN(COL)) {} else   \
+    if (boost::foreach_detail_::auto_any_t _foreach_cur = BOOST_FOREACH_BEGIN(COL)) {} else     \
+    if (boost::foreach_detail_::auto_any_t _foreach_end = BOOST_FOREACH_END(COL)) {} else       \
+    for (bool _foreach_continue = true;                                                         \
+              _foreach_continue && !BOOST_FOREACH_DONE(COL);                                    \
+              _foreach_continue ? BOOST_FOREACH_NEXT(COL) : (void)0)                            \
+        if  (boost::foreach_detail_::set_false(_foreach_continue)) {} else                      \
+        for (VAR = BOOST_FOREACH_DEREF(COL); !_foreach_continue; _foreach_continue = true)
+
+#endif

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/dindel.git



More information about the debian-med-commit mailing list