[med-svn] [sniffles] 02/07: New upstream version 1.0.6+ds

Afif Elghraoui afif at moszumanska.debian.org
Fri Nov 10 06:35:56 UTC 2017


This is an automated email from the git hooks/post-receive script.

afif pushed a commit to branch master
in repository sniffles.

commit 4cfc4c85f6bcf504adea7c5e0049ac4564f7a803
Author: Afif Elghraoui <afif at debian.org>
Date:   Fri Nov 10 01:18:27 2017 -0500

    New upstream version 1.0.6+ds
---
 CMakeLists.txt                 |   8 +-
 README.md                      |   5 +
 src/Alignment.cpp              | 233 ++++++++++++++++++++++++-------------
 src/Alignment.h                |  11 +-
 src/Genotyper/Genotyper.cpp    |  55 ++++++---
 src/Paramer.h                  |   8 +-
 src/Sniffles.cpp               |  91 +++++++++------
 src/Version.h                  |   2 +-
 src/cluster/Cluster_SVs.cpp    |  31 +++--
 src/cluster/Cluster_SVs.h      |   2 +
 src/print/BedpePrinter.cpp     |  99 +++++++++-------
 src/print/IPrinter.cpp         | 110 ++++++++++++------
 src/print/IPrinter.h           |  43 ++++---
 src/print/VCFPrinter.cpp       | 253 ++++++++++++++++++++++++++---------------
 src/sub/Breakpoint.cpp         | 112 +++++++-----------
 src/sub/Breakpoint.h           |   1 +
 src/sub/Detect_Breakpoints.cpp | 226 ++++++++++++++++++++----------------
 17 files changed, 790 insertions(+), 500 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index cdf545c..b89228c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,13 +1,13 @@
 cmake_minimum_required(VERSION 2.8)
 project(Sniffles)
 
-set( SNIF_VERSION_MAJOR 1 )
-set( SNIF_VERSION_MINOR 0 )
+ set( SNIF_VERSION_MAJOR 1 )
+ set( SNIF_VERSION_MINOR 0 )
 IF(CMAKE_BUILD_TYPE STREQUAL "Debug")
     message(STATUS "Building in debug mode!")
-    set( SNIF_VERSION_BUILD 2-debug )
+    set( SNIF_VERSION_BUILD 6-debug )
 else()
-	set( SNIF_VERSION_BUILD 2 )
+	set( SNIF_VERSION_BUILD 6 )
 ENDIF()
 
 
diff --git a/README.md b/README.md
index 27c253d..6c4d975 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,11 @@ Please see our github wiki for more information (https://github.com/fritzsedlaze
 Sniffles performs best with the mappings of NGM-LR our novel long read mapping method. 
 Please see:
 https://github.com/philres/nextgenmap-lr
+
+****************************************
+## Citation:
+Please see and cite our preprint:
+http://www.biorxiv.org/content/early/2017/07/28/169557
   
 **************************************
 ## Poster & Talks:
diff --git a/src/Alignment.cpp b/src/Alignment.cpp
index 46e202d..6fcb409 100644
--- a/src/Alignment.cpp
+++ b/src/Alignment.cpp
@@ -72,9 +72,12 @@ vector<differences_str> Alignment::summarizeAlignment() {
 	vector<differences_str> events;
 	int pos = this->getPosition();
 	differences_str ev;
-	bool flag = false; // (strcmp(this->getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0);
+	bool flag = (strcmp(this->getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0);
 
 	for (size_t i = 0; i < al->CigarData.size(); i++) {
+		if (flag) {
+			//	cout<<al->CigarData[i].Type<<al->CigarData[i].Length<<std::endl;
+		}
 		if (al->CigarData[i].Type == 'D') {
 			ev.position = pos;
 			ev.type = al->CigarData[i].Length; //deletion
@@ -88,28 +91,29 @@ vector<differences_str> Alignment::summarizeAlignment() {
 			pos += al->CigarData[i].Length;
 		} else if (al->CigarData[i].Type == 'N') {
 			pos += al->CigarData[i].Length;
-		}
-		/*else if (al->CigarData[i].Type == 'S' && al->CigarData[i].Length > Parameter::Instance()->huge_ins) { /// Used for reads ranging into an inser
+		} else if (al->CigarData[i].Type == 'S' && al->CigarData[i].Length > Parameter::Instance()->huge_ins) { /// Used for reads ranging into an inser
 			string sa;
 			al->GetTag("SA", sa);
-			if (sa.empty()) { // == no split reads!
+			uint32_t sv;
+			if ((al->GetTag("SV", sv) && sa.empty()) && (!(sv & Ns_CLIPPED) && !(sv & FULLY_EXPLAINED))) { // TODO remove last )
 				if (flag) {
 					std::cout << "Chop: " << pos << " Rname: " << this->getName() << std::endl;
 				}
 				if (pos == this->getPosition()) {
-					ev.position = pos - Parameter::Instance()->huge_ins;
+					ev.position = pos; // - Parameter::Instance()->huge_ins;
 				} else {
 					ev.position = pos;
 				}
 				ev.type = Parameter::Instance()->huge_ins * -1; //insertion: WE have to fix the length since we cannot estimate it!]
 				events.push_back(ev);
 			}
-		}*/
+		}
 	}
 
 	if (flag) {
+		std::cout << "FIRST:" << std::endl;
 		for (size_t i = 0; i < events.size(); i++) {
-			if (abs(events[i].type) > 3000) {
+			if (abs(events[i].type) > 200) {
 				cout << events[i].position << " " << events[i].type << endl;
 			}
 		}
@@ -158,8 +162,9 @@ vector<differences_str> Alignment::summarizeAlignment() {
 	}
 
 	if (flag) {
+		std::cout << "SECOND:" << std::endl;
 		for (size_t i = 0; i < events.size(); i++) {
-			if (abs(events[i].type) > 3000) {
+			if (abs(events[i].type) > 200) {
 				cout << events[i].position << " " << events[i].type << endl;
 			}
 		}
@@ -170,43 +175,44 @@ vector<differences_str> Alignment::summarizeAlignment() {
 //	comp_aln = clock();
 	size_t i = 0;
 	//to erase stretches of consecutive mismatches == N in the ref
-	int break_point = 0;
-	while (i < events.size()) {
-		if (events[i].position > max_size) {
-			while (i < events.size()) {
-				if (abs(events[events.size() - 1].type) == Parameter::Instance()->huge_ins) {
-					i++;
-				} else {
-					events.erase(events.begin() + i, events.begin() + i + 1);
-				}
-			}
-			break;
-		}
-		if (events[i].type == 0) {
-			size_t j = 1;
-			while (i + j < events.size() && ((events[i + j].position - events[i + (j - 1)].position) == 1 && events[i + j].type == 0)) {
-				j++;
-			}
-			if (j > 10) { //if stetch is at least 3 consecutive mismatches
-				events.erase(events.begin() + i, events.begin() + i + j);
-			} else {
-				i += j;
-			}
-		} else {
-			i++;
-		}
-	}
+	/*int break_point = 0;
+	 while (i < events.size()) {
+	 if (events[i].position > max_size) {
+	 while (i < events.size()) {
+	 if (abs(events[events.size() - 1].type) == Parameter::Instance()->huge_ins) {
+	 i++;
+	 } else {
+	 events.erase(events.begin() + i, events.begin() + i + 1);
+	 }
+	 }
+	 break;
+	 }
+	 if (events[i].type == 0) {
+	 size_t j = 1;
+	 while (i + j < events.size() && ((events[i + j].position - events[i + (j - 1)].position) == 1 && events[i + j].type == 0)) {
+	 j++;
+	 }
+	 if (j > 10) { //if stetch is at least 3 consecutive mismatches
+	 events.erase(events.begin() + i, events.begin() + i + j);
+	 } else {
+	 i += j;
+	 }
+	 } else {
+	 i++;
+	 }
+	 }*/
 //	Parameter::Instance()->meassure_time(comp_aln, "\t\terrase N: ");
 	if (flag) {
 		cout << "LAST:" << endl;
 		for (size_t i = 0; i < events.size(); i++) {
-			if (abs(events[i].type) > 3000) {
+			if (abs(events[i].type) > 200) {
 				cout << events[i].position << " " << events[i].type << endl;
 			}
 		}
 		cout << endl;
-	}
 
+		cout << "total: " << events.size() << endl;
+	}
 	return events;
 }
 void Alignment::computeAlignment() {
@@ -464,7 +470,8 @@ void Alignment::get_coords(aln_str tmp, int & start, int &stop) {
 }
 void Alignment::check_entries(vector<aln_str> &entries) {
 
-	bool flag = (strcmp(this->getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0);
+	bool flag =(strcmp(this->getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0);
+
 	if (flag) {
 		std::cout << "Nested? " << std::endl;
 		for (size_t i = 0; i < entries.size(); i++) {
@@ -474,16 +481,34 @@ void Alignment::check_entries(vector<aln_str> &entries) {
 			} else {
 				std::cout << "- ";
 			}
+			//sort_insert_ref(entries[i], new_entries);
 		}
 		std::cout << std::endl;
+
 	}
+
 	int chr = entries[0].RefID;
 	bool strand = entries[0].strand;
 	int strands = 1;
 	int valid = 1;
+	double read_gaps = 0;
+	double ref_gaps = 0;
+
+	int ref_size = 0;
+	int read_size = 0;
+
 	for (size_t i = 1; i < entries.size(); i++) {
-		if (entries[i].read_pos_stop - entries[i].read_pos_start > 200) {
-			valid++;
+		if (entries[i].read_pos_stop - entries[i].read_pos_start > 200) { //only consider segments > 200bp.
+			ref_size = min((int) abs((entries[i - 1].pos + entries[i - 1].length) - entries[i].pos), (int) abs(entries[i - 1].pos - (entries[i].pos + entries[i].length)));
+
+			read_size = abs(entries[i - 1].read_pos_stop - entries[i].read_pos_start);
+			if (abs(ref_size - read_size) > Parameter::Instance()->min_length) {
+				valid++;
+			}
+			if(flag){
+			cout<<"Read: "<<read_size << " Ref: "<<ref_size<< " "<<this->getName()<<std::endl;
+			}
+
 			if (chr != entries[i].RefID) {
 				return;
 			}
@@ -493,10 +518,14 @@ void Alignment::check_entries(vector<aln_str> &entries) {
 			}
 		}
 	}
+
 	if (flag) {
-		std::cout << "summary: " << strands << " " << valid << std::endl;
+		std::cout << "summary: " << strands << " " << valid << " " << std::endl;
 	}
-	if (strands < 3 || valid < 3) {
+	if (strands < 3 || valid < 2 ) { //check!
+		if (flag) {
+			std::cout << "Return" << std::endl;
+		}
 		return;
 	}
 
@@ -511,6 +540,9 @@ void Alignment::check_entries(vector<aln_str> &entries) {
 			read_dist = abs(entries[i - 1].read_pos_stop - entries[i].read_pos_start);
 		}
 
+		if (flag) {
+			std::cout << "REF DIST: " << ref_dist << " READ DIST: " << read_dist << std::endl;
+		}
 		if (abs(entries[i - 1].pos - entries[i].pos) < 100) { //inv dup:
 			aln_str tmp;
 			tmp.RefID = entries[i].RefID;
@@ -659,19 +691,21 @@ void Alignment::check_entries(vector<aln_str> &entries) {
  entries = new_entries;
  }
  }*/
+
+void Alignment::sort_insert_ref(aln_str tmp, vector<aln_str> &entries) {
+
+	for (vector<aln_str>::iterator i = entries.begin(); i != entries.end(); i++) {
+		if ((tmp.pos < (*i).pos)) { //insert before
+			entries.insert(i, tmp);
+			return;
+		}
+	}
+	entries.push_back(tmp);
+}
+
 void Alignment::sort_insert(aln_str tmp, vector<aln_str> &entries) {
 
 	for (vector<aln_str>::iterator i = entries.begin(); i != entries.end(); i++) {
-		/*	if ((tmp.read_pos_start == (*i).read_pos_start) && (tmp.pos == (*i).pos) && (tmp.strand == (*i).strand)) { //is the same! should not happen
-		 return;
-		 }
-		 if (!tmp.cigar.empty() && ((*i).read_pos_start <= tmp.read_pos_start && (*i).read_pos_stop >= tmp.read_pos_stop)) { //check for the additional introducded entries
-		 return;
-		 }
-		 if (!tmp.cigar.empty() && ((*i).read_pos_start >= tmp.read_pos_start && (*i).read_pos_stop <= tmp.read_pos_stop)) { //check for the additional introducded entries
-		 (*i) = tmp;
-		 return;
-		 }*/
 		if ((tmp.read_pos_start < (*i).read_pos_start)) { //insert before
 			entries.insert(i, tmp);
 			return;
@@ -702,6 +736,9 @@ vector<aln_str> Alignment::getSA(RefVector ref) {
 		tmp.mq = this->getMappingQual();
 		tmp.pos = (long) this->getPosition(); //+get_ref_lengths(tmp.RefID, ref);
 		tmp.strand = getStrand();
+		uint32_t sv;
+		al->GetTag("SV", sv);
+		tmp.cross_N = ((sv & Ns_CLIPPED));
 		bool flag = strcmp(getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0;
 
 		get_coords(tmp, tmp.read_pos_start, tmp.read_pos_stop);
@@ -739,7 +776,8 @@ vector<aln_str> Alignment::getSA(RefVector ref) {
 				count++;
 			}
 			if (sa[i] == ';') {
-				if (tmp.mq > Parameter::Instance()->min_mq && entries.size() <= Parameter::Instance()->max_splits) {
+				//TODO: maybe check how often this happens per read!
+				if ((tmp.mq > Parameter::Instance()->min_mq || sv & FULLY_EXPLAINED) && entries.size() <= Parameter::Instance()->max_splits) {
 					//TODO: check this!
 					tmp.cigar = translate_cigar(cigar); //translates the cigar (string) to a type vector
 					get_coords(tmp, tmp.read_pos_start, tmp.read_pos_stop); //get the coords on the read.
@@ -758,6 +796,9 @@ vector<aln_str> Alignment::getSA(RefVector ref) {
 					//insert sorted:
 					includes_SV = true;
 					sort_insert(tmp, entries);
+
+					//al->GetTag("SV", sv);   <-get that involved
+
 				} else if (tmp.mq < Parameter::Instance()->min_mq) {
 					nested = false;
 				} else {					//Ignore read due to too many splits
@@ -772,7 +813,7 @@ vector<aln_str> Alignment::getSA(RefVector ref) {
 			}
 			i++;
 		}
-		if (nested && entries.size() > 2 || overlapping_segments(entries)) {
+		if (nested && (entries.size() > 2 || overlapping_segments(entries))) {
 			check_entries(entries);
 		}
 		if (flag) {
@@ -793,11 +834,11 @@ vector<aln_str> Alignment::getSA(RefVector ref) {
 double Alignment::get_scrore_ratio() {
 	uint score = -1;
 	uint subscore = -1;
-	if (al->GetTag("AS", score) && al->GetTag("XS", subscore)) {
+	if (al->GetTag("AS", score)) {
+		al->GetTag("XS", subscore);
 		if (subscore == 0) {
 			subscore = 1;
 		}
-		//cout<<'\t'<<score<<" "<<subscore<<endl;
 		return (double) score / (double) subscore;
 	}
 	return -1;
@@ -805,16 +846,9 @@ double Alignment::get_scrore_ratio() {
 bool Alignment::get_is_save() {
 	string sa;
 
-	double score = get_scrore_ratio();
-
-	/*if((al->GetTag("XA", sa) && !sa.empty())){
-	 std::cout<<this->getName()<<"XA"<<std::endl;
-	 }
-	 if( (al->GetTag("XT", sa) && !sa.empty()) ){
-	 std::cout<<this->getName()<<"XT"<<std::endl;
-	 }*/
+	double score = get_scrore_ratio(); //TODO should I use this again for bwa?
 
-	return !((al->GetTag("XA", sa) && !sa.empty()) || (al->GetTag("XT", sa) && !sa.empty()));					//|| (score == -1 || score < Parameter::Instance()->score_treshold)); //TODO: 7.5
+	return !((al->GetTag("XA", sa) && !sa.empty()) || (al->GetTag("XT", sa) && !sa.empty())) && (score == -1 || score > Parameter::Instance()->score_treshold);					//|| //TODO: 7.5
 }
 
 std::vector<CigarOp> Alignment::translate_cigar(std::string cigar) {
@@ -1046,7 +1080,7 @@ vector<str_event> Alignment::get_events_MD(int min_mis) {
 	return events;
 }
 
-vector<int> Alignment::get_avg_diff(double & dist) {
+vector<int> Alignment::get_avg_diff(double & dist, double & avg_del, double & avg_ins) {
 
 //computeAlignment();
 //cout<<alignment.first<<endl;
@@ -1061,10 +1095,15 @@ vector<int> Alignment::get_avg_diff(double & dist) {
 	PlaneSweep_slim * plane = new PlaneSweep_slim();
 	int min_tresh = 5; //reflects a 10% error rate.
 //compute the profile of differences:
+	double del = 0;
+	double ins = 0;
+	double mis = 0;
+	double length = event_aln[event_aln.size() - 1].position - event_aln[0].position;
 	for (size_t i = 0; i < event_aln.size(); i++) {
 		if (i != 0) {
 			dist += event_aln[i].position - event_aln[i - 1].position;
 		}
+
 		pair_str tmp;
 		tmp.position = -1;
 		if (event_aln[i].type == 0) {
@@ -1075,7 +1114,16 @@ vector<int> Alignment::get_avg_diff(double & dist) {
 		if (tmp.position != -1) { //check that its not the prev event!
 			mis_per_window.push_back(tmp.coverage); //store #mismatch per window each time it exceeds. (which might be every event position!)
 		}
+		if (event_aln[i].type > 0) {
+			avg_del += event_aln[i].type;
+		} else if (event_aln[i].type < 0) {
+			avg_ins += event_aln[i].type * -1;
+		}
 	}
+
+	avg_ins = avg_ins / length;
+	avg_del = avg_del / length;
+
 	dist = dist / (double) event_aln.size();
 	plane->finalyze();
 	return mis_per_window;	//total_num /num;
@@ -1083,8 +1131,13 @@ vector<int> Alignment::get_avg_diff(double & dist) {
 
 vector<str_event> Alignment::get_events_Aln() {
 
+	bool flag = (strcmp(this->getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0);
+
 //clock_t comp_aln = clock();
 	vector<differences_str> event_aln = summarizeAlignment();
+	if (flag) {
+		std::cout << "test size: " << event_aln.size() << std::endl;
+	}
 //double time2 = Parameter::Instance()->meassure_time(comp_aln, "\tcompAln Events: ");
 
 	vector<str_event> events;
@@ -1092,7 +1145,11 @@ vector<str_event> Alignment::get_events_Aln() {
 	vector<pair_str> profile;
 //	comp_aln = clock();
 
-	bool flag = (strcmp(this->getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0);
+	bool is_N_region = false;
+	uint32_t sv;
+	if (al->GetTag("SV", sv) && (!(sv & Ns_CLIPPED) && !(sv & FULLY_EXPLAINED))) {
+		is_N_region = true;
+	}
 
 	int noise_events = 0;
 //compute the profile of differences:
@@ -1116,10 +1173,13 @@ vector<str_event> Alignment::get_events_Aln() {
 	}
 
 //comp_aln = clock();
-	size_t stop = 0;
+	int stop = 0;
 	size_t start = 0;
-	for (size_t i = 0; i < profile.size(); i++) {
-		if (profile[i].position > event_aln[stop].position) {
+	for (size_t i = 0; i < profile.size() && stop < event_aln.size(); i++) {
+		if (flag) {
+			std::cout << "Prof: " << profile[i].position << " " << event_aln[stop].position << std::endl;
+		}
+		if (profile[i].position >= event_aln[stop].position) {
 			//find the postion:
 			size_t pos = 0;
 			while (pos < event_aln.size() && event_aln[pos].position != profile[i].position) {
@@ -1130,6 +1190,9 @@ vector<str_event> Alignment::get_events_Aln() {
 			int prev = event_aln[pos].position;
 			start = pos;
 			int prev_type = 1;
+			if (flag) {
+				std::cout << "check start" << std::endl;
+			}
 			//todo it is actually pos + type and not *type
 			while (start > 0 && (prev - event_aln[start].position) < (Parameter::Instance()->max_dist_alns)) {	//13		//} * abs(event_aln[start].type) + 1)) { //TODO I  dont like 13!??
 				prev = event_aln[start].position;
@@ -1147,7 +1210,9 @@ vector<str_event> Alignment::get_events_Aln() {
 			prev = event_aln[pos].position;
 			stop = pos;
 			prev_type = 1;
-
+			if (flag) {
+				std::cout << "check stop" << std::endl;
+			}
 			while (stop < event_aln.size() && (event_aln[stop].position - prev) < (Parameter::Instance()->max_dist_alns)) {		// * abs(event_aln[stop].type) + 1)) {
 				prev = event_aln[stop].position;
 
@@ -1158,8 +1223,9 @@ vector<str_event> Alignment::get_events_Aln() {
 				}
 				prev += prev_type;
 			}
-			stop--;
-
+			if (stop > 0) {
+				stop--;
+			}
 			int insert_max_pos = 0;
 			int insert_max = 0;
 			if (event_aln[start].type < 0) {
@@ -1173,13 +1239,13 @@ vector<str_event> Alignment::get_events_Aln() {
 			double insert = 0;
 			double del = 0;
 			double mismatch = 0;
+//			if (flag) {
+//				cout << start << " " << stop << endl;
+//			}
 			if (flag) {
-				cout << start << " " << stop << endl;
+				std::cout << "check total len: " << start << " " << stop << std::endl;
 			}
 			for (size_t k = start; k <= stop; k++) {
-				if (flag) {
-					cout << event_aln[k].position << " " << event_aln[k].type << endl;
-				}
 				if (event_aln[k].type == 0) {
 					mismatch++;
 				} else if (event_aln[k].type > 0) {
@@ -1205,6 +1271,9 @@ vector<str_event> Alignment::get_events_Aln() {
 			}
 			tmp.length = (tmp.length - event_aln[start].position);
 
+			if (flag) {
+				std::cout << "decide" << std::endl;
+			}
 			tmp.type = 0;
 			if (insert_max > Parameter::Instance()->min_length && insert > (del + del)) { //we have an insertion! //todo check || vs. &&
 				if (flag) {
@@ -1214,6 +1283,9 @@ vector<str_event> Alignment::get_events_Aln() {
 				tmp.pos = insert_max_pos;
 				tmp.type |= INS;
 				tmp.is_noise = false;
+				if (is_N_region && insert_max * Parameter::Instance()->avg_ins < Parameter::Instance()->min_length) {
+					tmp.type = 0;
+				}
 			} else if (del_max > Parameter::Instance()->min_length && (insert + insert) < del) { //deletion
 				if (flag) {
 					cout << "store DEL " << this->getName() << endl;
@@ -1221,7 +1293,10 @@ vector<str_event> Alignment::get_events_Aln() {
 				tmp.length = del_max;
 				tmp.type |= DEL;
 				tmp.is_noise = false;
-			} else if ((mismatch+del+insert)/2 > Parameter::Instance()->min_length) { //TODO
+				if (is_N_region && del_max * Parameter::Instance()->avg_del < Parameter::Instance()->min_length) {
+					tmp.type = 0;
+				}
+			} else if ((mismatch + del + insert) / 2 > Parameter::Instance()->min_length) { //TODO
 				if (flag) {
 					cout << "store Noise" << endl;
 				}
@@ -1229,8 +1304,12 @@ vector<str_event> Alignment::get_events_Aln() {
 				tmp.type |= DEL;
 				tmp.type |= INV;
 				tmp.is_noise = true;
+				if (is_N_region || ((del_max> Parameter::Instance()->min_length && insert_max > Parameter::Instance()->min_length) && (del_max/insert_max)<Parameter::Instance()->min_length)) {
+					tmp.type = 0;
+				}
 			} else {
 				//	std::cout<<"ERROR we did not set the type!"<<std::endl;
+				tmp.type = 0;
 			}
 
 			if (flag) {
diff --git a/src/Alignment.h b/src/Alignment.h
index 117cad4..38fa46d 100644
--- a/src/Alignment.h
+++ b/src/Alignment.h
@@ -25,6 +25,12 @@ const unsigned char TRA = 0x10; // hex for 0001 0000
 const unsigned char NEST =0x20; // hex for 0010 0000
 const unsigned char NA = 0x80;  // hex for 1000 0000
 
+
+//NGM: choped alns:
+const unsigned int NOT_CLIPPED = 0x0;
+const unsigned int Ns_CLIPPED = 0x1;
+const unsigned int FULLY_EXPLAINED = 0x2;
+
 using namespace BamTools;
 using namespace std;
 typedef unsigned short ushort;
@@ -52,6 +58,7 @@ struct aln_str{
 	long length;
 	int read_pos_start;
 	int read_pos_stop;
+	bool cross_N;
 };
 
 class Alignment {
@@ -69,6 +76,8 @@ private:
 	 int get_id(RefVector ref, std::string chr);
 	 vector<differences_str> summarizeAlignment();
 	 void sort_insert(aln_str tmp, vector<aln_str> &entries);
+
+	 void sort_insert_ref(aln_str tmp, vector<aln_str> &entries);
 	 void check_entries(vector<aln_str> &entries);
 	 bool overlapping_segments(vector<aln_str> entries);
 public:
@@ -127,7 +136,7 @@ public:
 	 double get_scrore_ratio();
 	 std::string get_md();
 	 double get_avg_indel_length_Cigar();
-	 vector<int> get_avg_diff(double & dist);
+	 vector<int> get_avg_diff(double & dist,double & avg_del, double & avg_len);
 
 };
 
diff --git a/src/Genotyper/Genotyper.cpp b/src/Genotyper/Genotyper.cpp
index cd5bb93..9be2a48 100644
--- a/src/Genotyper/Genotyper.cpp
+++ b/src/Genotyper/Genotyper.cpp
@@ -1,5 +1,5 @@
 /*
- * Genotyper.cpp
+ / * Genotyper.cpp
  *
  *  Created on: Mar 28, 2016
  *      Author: fsedlaze
@@ -14,23 +14,32 @@ std::string Genotyper::mod_breakpoint_vcf(char *buffer, int ref) {
 	string entry;
 	string tmp = string(buffer);
 	int pos = 0;
-	pos = tmp.find_last_of('\t');
 
-	entry = tmp.substr(0, pos);
-	entry += '\t';
+	pos = tmp.find_last_of("GT");
+	//tab
+	entry = tmp.substr(0, pos - 2);
 
-	tmp = tmp.substr(pos + 1);
+	tmp = tmp.substr(pos + 1);	// the right part is only needed:
 	pos = tmp.find_last_of(':');
 	int support = atoi(tmp.substr(pos + 1).c_str());
 	double allele = (double) support / (double) (support + ref);
+
+	if (allele < Parameter::Instance()->min_allelel_frequency) {
+		return "";
+	}
+	std::stringstream ss;
+	ss << ";AF=";
+	ss << allele;
+	ss << "\tGT:DR:DV\t";
+
 	if (allele > 0.8) {
-		entry += "1/1:";
+		ss << "1/1:";
 	} else if (allele > 0.3) {
-		entry += "0/1:";
+		ss << "0/1:";
 	} else {
-		entry += "0/0:";
+		ss << "0/0:";
 	}
-	std::stringstream ss;
+
 	ss << ref;
 	ss << ":";
 	ss << support;
@@ -49,6 +58,12 @@ std::string Genotyper::mod_breakpoint_bedpe(char *buffer, int ref) {
 
 	int pos = tmp.find_last_of('\t'); //TODO!!
 	int support = atoi(tmp.substr(pos + 1).c_str());
+	double allele = (double) support / (double) (support + ref);
+
+	if (allele < Parameter::Instance()->min_allelel_frequency) {
+		return "";
+	}
+
 	std::stringstream ss;
 	ss << ref;
 	ss << "\t";
@@ -58,8 +73,8 @@ std::string Genotyper::mod_breakpoint_bedpe(char *buffer, int ref) {
 }
 
 void Genotyper::parse_pos(char * buffer, int & pos, std::string & chr) {
-	chr="";
-	pos=-1;
+	chr = "";
+	pos = -1;
 	size_t i = 0;
 	int count = 0;
 	while (buffer[i] != '\t') {
@@ -91,7 +106,7 @@ variant_str Genotyper::get_breakpoint_vcf(char *buffer) {
 			tmp.pos = atoi(&buffer[i]);
 		}
 		if (tmp.pos2 == -1 && (count == 4 && (buffer[i - 1] == '[' || buffer[i - 1] == ']'))) {
-			parse_pos(&buffer[i - 1],tmp.pos2,tmp.chr2);
+			parse_pos(&buffer[i - 1], tmp.pos2, tmp.chr2);
 		}
 
 		if (count > 6 && strncmp(";CHR2=", &buffer[i], 6) == 0) {
@@ -166,8 +181,9 @@ void Genotyper::update_file(Breakpoint_Tree & tree, breakpoint_node *& node) {
 	char* buffer = new char[buffer_size];
 	myfile.getline(buffer, buffer_size);
 	//parse SVs breakpoints in file
-	while (!myfile.eof()) {
+	while (!myfile.eof()) { // TODO:if first -> we need to define AF!
 		if (buffer[0] != '#') {
+
 			std::string to_print;
 			// create binary tree to hold breakpoints!
 			variant_str tmp;
@@ -182,11 +198,15 @@ void Genotyper::update_file(Breakpoint_Tree & tree, breakpoint_node *& node) {
 			} else {
 				to_print = mod_breakpoint_bedpe(buffer, ref);
 			}
-			fprintf(file, "%s", to_print.c_str());
+			if (!to_print.empty()) {
+				fprintf(file, "%s", to_print.c_str());
+				fprintf(file, "%c", '\n');
+			}
 		} else {
 			fprintf(file, "%s", buffer);
+			fprintf(file, "%c", '\n');
 		}
-		fprintf(file, "%c", '\n');
+
 		myfile.getline(buffer, buffer_size);
 	}
 	myfile.close();
@@ -266,5 +286,10 @@ void Genotyper::update_SVs() {
 	read_SVs(this->tree, this->node);
 	compute_cov(this->tree, this->node);
 	update_file(this->tree, this->node);
+	cout << "Cleaning tmp files" << endl;
+	string del = "rm ";
+	del += Parameter::Instance()->tmp_file;
+	del += "ref_allele";
+	system(del.c_str());
 }
 
diff --git a/src/Paramer.h b/src/Paramer.h
index e75411f..f171eff 100644
--- a/src/Paramer.h
+++ b/src/Paramer.h
@@ -24,7 +24,7 @@ class Parameter {
 private:
 	Parameter() {
 		window_thresh=10;//TODO check!
-		version="1.0.2";
+		version="1.0.6";
 		huge_ins = 2000;//TODO check??
 	}
 	~Parameter() {
@@ -48,6 +48,9 @@ public:
 
 	double error_rate;
 	double score_treshold;
+	double min_allelel_frequency;
+	double avg_del;
+	double avg_ins;
 	//double min_num_mismatches;
 
 	int window_thresh;
@@ -61,11 +64,14 @@ public:
 	int min_grouping_support; //min num reads supporting the overlap of two SVs
 	int huge_ins;
 	int max_dist_alns;
+	int min_segment_size;
 
 //	bool splitthreader_output;
 	bool debug;
 	bool genotype;
 	bool phase;
+	bool ignore_std;
+	bool reportBND;
 
 	void set_regions(std::string reg) {
 		size_t i = 0;
diff --git a/src/Sniffles.cpp b/src/Sniffles.cpp
index b73d52f..7f7cd7f 100644
--- a/src/Sniffles.cpp
+++ b/src/Sniffles.cpp
@@ -10,6 +10,7 @@
 #include <iostream>
 #include "Paramer.h"
 #include <tclap/CmdLine.h>
+#include <unistd.h>
 #include <omp.h>
 #include "Genotyper/Genotyper.h"
 #include "realign/Realign.h"
@@ -26,11 +27,13 @@
 
 
 //TODO:
-// Make hist of position%10. Take highest and if second highest that is at least Xbp away is over Parameter -> Split
-//Upload new version. Take care about version naming!
+// strand bias??
+// I think you could make your performance on PacBio reads even better with a few modifications:
+//a.    The "snake" read that you show in Supplementary Figure 2.5 which limits calling of small inverted duplications likely results from an improperly constructed SMRTbell (see attached) that has adapters missing on one end.  The PacBio library prep reordered a few steps (years ago now) to prevent formation of these types of molecules.  So, hopefully it is no longer an issue.  Even with old data, you should be able to call small inverted duplications by counting support of ZMWs not jus [...]
+//b.    In pbsv, I use a simply mononucleotide consistency check to determine whether to cluster insertions from different reads as supporting the "same" events.  In addition to looking at the similarity of length and breakpoints, you could measure [min(Act)+min(Cct)+min(Gct)+min(Tct) / max(Act)+max(Cct)+max(Gct)+max(Tct)]  Even a lax criterion (>0.25) can avoid clustering phantom insertions (where one is say all A and the another is G+T).
 
-Parameter* Parameter::m_pInstance = NULL;
 
+Parameter* Parameter::m_pInstance = NULL;
 
 void read_parameters(int argc, char *argv[]) {
 
@@ -45,27 +48,36 @@ void read_parameters(int argc, char *argv[]) {
 	TCLAP::ValueArg<int> arg_minlength("l", "min_length", "Minimum length of SV to be reported. Default: 30", false, 30, "int");
 	TCLAP::ValueArg<int> arg_mq("q", "minmapping_qual", "Minimum Mapping Quality. Default: 20", false, 20, "int");
 	TCLAP::ValueArg<int> arg_numreads("n", "num_reads_report", "Report up to N reads that support the SV in the vcf file. Default: 0", false, 0, "int");
+	TCLAP::ValueArg<int> arg_segsize("r","min_seq_size","Discard read if non of its segment is larger then this. Default: 2kb",false,2000,"int");
 	TCLAP::ValueArg<std::string> arg_tmp_file("", "tmp_file", "path to temporary file otherwise Sniffles will use the current directory.", false, "", "string");
 	TCLAP::SwitchArg arg_genotype("", "genotype", "Enables Sniffles to compute the genotypes.", cmd, false);
 	TCLAP::SwitchArg arg_cluster("", "cluster", "Enables Sniffles to phase SVs that occur on the same reads", cmd, false);
+	TCLAP::SwitchArg arg_std("", "ignore_sd", "Ignores the sd based filtering. (default: false)", cmd, false);
+	TCLAP::SwitchArg arg_bnd("", "report_BND", "Report BND instead of Tra in vcf output. (default: false)", cmd, false);
+	TCLAP::ValueArg<int> arg_cluster_supp("", "cluster_support", "Minimum number of reads supporting clustering of SV. Default: 1", false, 1, "int");
+	TCLAP::ValueArg<float> arg_allelefreq("f", "allelefreq", "Threshold on allele frequency (0-1).", false, 0.0, "float");
 
+	cmd.add(arg_cluster_supp);
 	cmd.add(arg_numreads);
+	cmd.add(arg_segsize);
 	cmd.add(arg_tmp_file);
 	cmd.add(arg_dist);
 	cmd.add(arg_threads);
-	cmd.add(arg_bedpe);
-	cmd.add(arg_vcf);
 	cmd.add(arg_minlength);
 	cmd.add(arg_mq);
 	cmd.add(arg_splits);
+	cmd.add(arg_bedpe);
+	cmd.add(arg_vcf);
+	cmd.add(arg_allelefreq);
 	cmd.add(arg_support);
 	cmd.add(arg_bamfile);
+
 	//parse cmd:
 	cmd.parse(argc, argv);
 
 	Parameter::Instance()->debug = true;
 	Parameter::Instance()->score_treshold = 10;
-	Parameter::Instance()->read_name = " "; //just for debuging reasons!
+	Parameter::Instance()->read_name = "21_43213620_-";//21_16296949_+";//21_40181680_-";//m151102_123142_42286_c100922632550000001823194205121665_s1_p0/80643/0_20394"; //"22_36746138"; //just for debuging reasons!
 	Parameter::Instance()->bam_files.push_back(arg_bamfile.getValue());
 	Parameter::Instance()->min_mq = arg_mq.getValue();
 	Parameter::Instance()->output_vcf = arg_vcf.getValue();
@@ -79,18 +91,28 @@ void read_parameters(int argc, char *argv[]) {
 	Parameter::Instance()->num_threads = arg_threads.getValue();
 	Parameter::Instance()->output_bedpe = arg_bedpe.getValue();
 	Parameter::Instance()->tmp_file = arg_tmp_file.getValue();
-	Parameter::Instance()->min_grouping_support = 1;
+	Parameter::Instance()->min_grouping_support = arg_cluster_supp.getValue();
+	Parameter::Instance()->min_allelel_frequency = arg_allelefreq.getValue();
+	Parameter::Instance()->min_segment_size = arg_segsize.getValue();
+	Parameter::Instance()->reportBND= arg_bnd.getValue();
+
+	Parameter::Instance()->ignore_std=arg_std.getValue();
+
+	if (Parameter::Instance()->min_allelel_frequency > 0) {
+		std::cerr << "Automatically enabling genotype mode" << std::endl;
+		Parameter::Instance()->genotype = true;
+	}
 
 	if (Parameter::Instance()->tmp_file.empty()) {
 		std::stringstream ss;
 		srand(time(NULL));
-		//ss<<"."; //TODO: User does not need to see this!
-		ss << rand();
+		ss << arg_bamfile.getValue();
 		ss << "_tmp";
-		Parameter::Instance()->tmp_file = ss.str();
+		Parameter::Instance()->tmp_file = ss.str(); //check if file exists! -> if yes throw the dice again
 	}
 }
 
+//some toy/test functions:
 void parse_binary() {
 	std::string tmp_name_file = Parameter::Instance()->tmp_file; // this file is created in IPrinter and stores the names and ID of SVS.
 	tmp_name_file += "Names";
@@ -168,24 +190,25 @@ void test_std() {
 	srand(time(NULL));
 	int start = rand() % 100000; /// sqrt(1/12) for ins. Plot TRA std vs. cov/support.
 	std::vector<int> positions;
-	double avg=0;
-	double num=0;
-	for (int t = 0; t < 1000; t++) {
-		for (int border = 100; border < 90001; border = border * 5) {
-			//for (int cov = 1; cov < 800; cov += 10) {
-			int cov = 100;
-			for (size_t i = 0; i < cov; i++) {
-				int pos = (rand() % border) + (start - (border / 2));
-				positions.push_back(pos);
+	double avg = 0;
+	double num = 0;
+
+	for (int border = 100; border < 9001; border = border * 10) {
+		for (int t = 0; t < 10; t++) {
+			for (int cov = 2; cov < 5; cov += 1) {
+
+				for (size_t i = 0; i < cov; i++) {
+					int pos = (rand() % border) + (start - (border / 2));
+					positions.push_back(pos);
+				}
+				avg += comp_std(positions, start) / test_comp_std_quantile(positions, start);
+				std::cout << "Cov: " << cov + 1 << " border: " << border << " STD: " << comp_std(positions, start) << std::endl; // / test_comp_std_quantile(positions, start) << std::endl;
+				positions.clear();
+				num++;
 			}
-			avg+=comp_std(positions, start) / test_comp_std_quantile(positions, start);
-			std::cout << "Cov: " << cov << " border: " << border << " STD: " << comp_std(positions, start) / test_comp_std_quantile(positions, start) << std::endl;
-			positions.clear();
-			num++;
-			//}
 		}
 	}
-	std::cout<<"AVG: "<<avg/num<<std::endl;
+	std::cout << "AVG: " << avg / num << std::endl;
 }
 
 void get_rand(int mean, int num, vector<int> & positions, int interval) {
@@ -255,14 +278,14 @@ void test_slimming() {
 		std::cout << std::endl;
 	}
 }
+
 int main(int argc, char *argv[]) {
 
 	try {
-		//test_slimming();
-	//	test_std();
-	//	exit(0);
-
-
+		//	test_slimming();
+		//	test_std();
+		//
+		//exit(0);
 		//init parameter and reads user defined parameter from command line.
 		read_parameters(argc, argv);
 
@@ -276,9 +299,7 @@ int main(int argc, char *argv[]) {
 		}
 		//init printer:
 		IPrinter * printer;
-		if (!Parameter::Instance()->ref_seq.empty()) {
-			printer = new NGMPrinter();
-		} else if (!Parameter::Instance()->output_vcf.empty()) {
+		if (!Parameter::Instance()->output_vcf.empty()) {
 			printer = new VCFPrinter();
 		} else if (!Parameter::Instance()->output_bedpe.empty()) {
 			printer = new BedpePrinter();
@@ -305,10 +326,6 @@ int main(int argc, char *argv[]) {
 			go->update_SVs();
 		}
 
-		cout << "Cleaning tmp files" << endl;
-		string del = "rm ";
-		del += Parameter::Instance()->tmp_file;
-		//system(del.c_str());
 
 	} catch (TCLAP::ArgException &e)  // catch any exceptions
 	{
diff --git a/src/Version.h b/src/Version.h
index d389390..bb36854 100644
--- a/src/Version.h
+++ b/src/Version.h
@@ -3,7 +3,7 @@
  
 #define VERSION_MAJOR "1"
 #define VERSION_MINOR "0"
-#define VERSION_BUILD "2"
+#define VERSION_BUILD "6"
 
 #endif // VERSION_H
 
diff --git a/src/cluster/Cluster_SVs.cpp b/src/cluster/Cluster_SVs.cpp
index 1ea3025..509fdd6 100644
--- a/src/cluster/Cluster_SVs.cpp
+++ b/src/cluster/Cluster_SVs.cpp
@@ -86,12 +86,12 @@ void Cluster_SVS::update_SVs(std::vector<combine_str> & ids) {
 	move += filename;
 	system(move.c_str());
 }
-void Cluster_SVS::add_id(int curr_id, int new_id, std::vector<combine_str> & ids,int subkey) {
+void Cluster_SVS::add_id(int curr_id, int new_id, std::vector<combine_str> & ids, int subkey) {
 
-	for (size_t i = 0; i < ids.size(); i++) {//check if already in the new array
+	for (size_t i = 0; i < ids.size(); i++) { //check if already in the new array
 		if (ids[i].curr_id == curr_id && ids[i].alt_id == new_id) {
 			ids[i].support++;
-			ids[i].hit=subkey;
+			ids[i].hit = subkey;
 			return;
 		}
 	}
@@ -101,23 +101,23 @@ void Cluster_SVS::add_id(int curr_id, int new_id, std::vector<combine_str> & ids
 	tmp.curr_id = curr_id;
 	tmp.alt_id = new_id; //smallest ID of SVs
 	tmp.support = 1;
-	tmp.hit=subkey;
+	tmp.hit = subkey;
 	ids.push_back(tmp);
 }
 std::string Cluster_SVS::find_id(int curr_id, std::vector<combine_str> & ids) {
-	std::stringstream ss ;
+	std::stringstream ss;
 	for (size_t i = 0; i < ids.size(); i++) {
 		if (ids[i].support > Parameter::Instance()->min_grouping_support) {
 			if (ids[i].curr_id == curr_id) {
 
-				ss<<ids[i].alt_id;
-				ss<<'_';
-				ss<<ids[i].hit;
+				ss << ids[i].alt_id;
+				ss << '_';
+				ss << ids[i].hit;
 				return ss.str();
 			}
 		}
 	}
-	ss<<curr_id;
+	ss << curr_id;
 	return ss.str();
 }
 void Cluster_SVS::update_SVs() {
@@ -138,11 +138,11 @@ void Cluster_SVS::update_SVs() {
 				min_id = std::min(min_id, (*i).second[j]);
 			}
 			//min_id is now the smallest SVs id that this read is associated with.
-			int subkey=0;
+			int subkey = 0;
 			for (size_t j = 0; j < (*i).second.size(); j++) { // update the other SVs IDs
 				//if ((*i).second[j] != min_id) {
-					add_id((*i).second[j], min_id, ids,subkey);
-					subkey++;
+				add_id((*i).second[j], min_id, ids, subkey);
+				subkey++;
 				//}
 			}
 		}
@@ -151,4 +151,11 @@ void Cluster_SVS::update_SVs() {
 
 //3: Update the IDS in the VCF/Bedpe files.
 	update_SVs(ids);
+	std::string tmp_name_file = Parameter::Instance()->tmp_file; // this file is created in IPrinter and stores the names and ID of SVS.
+	tmp_name_file += "Names";
+	std::cout << "Cleaning tmp files" << std::endl;
+	std::string del = "rm ";
+	del += tmp_name_file;
+	system(del.c_str());
+
 }
diff --git a/src/cluster/Cluster_SVs.h b/src/cluster/Cluster_SVs.h
index 312dc96..4f936d7 100644
--- a/src/cluster/Cluster_SVs.h
+++ b/src/cluster/Cluster_SVs.h
@@ -14,6 +14,8 @@
 #include <map>
 #include <sstream>
 #include "../Paramer.h"
+#include <string.h>
+#include <string>
 struct __attribute__((packed)) name_str{
 	long read_name; //needs to be a number to store in binary! (bit reservation!)
 	int svs_id;
diff --git a/src/print/BedpePrinter.cpp b/src/print/BedpePrinter.cpp
index 0b4821d..2de4b1f 100644
--- a/src/print/BedpePrinter.cpp
+++ b/src/print/BedpePrinter.cpp
@@ -16,49 +16,62 @@ void BedpePrinter::print_body(Breakpoint * &SV, RefVector ref) {
 		if (Parameter::Instance()->phase) {
 			store_readnames(SV->get_read_ids(), id);
 		}
+		double std_quant_start = 0;
+		double std_quant_stop = 0;
 
-		std::string chr;
-		std::string strands = SV->get_strand(2);
-		int pos = IPrinter::calc_pos(SV->get_coordinates().start.min_pos, ref, chr);
-		fprintf(file, "%s", chr.c_str());
-		fprintf(file, "%c", '\t');
-		fprintf(file, "%i", pos);
-		fprintf(file, "%c", '\t');
-		fprintf(file, "%i", IPrinter::calc_pos(SV->get_coordinates().start.max_pos, ref, chr));
-		fprintf(file, "%c", '\t');
-		pos = IPrinter::calc_pos(SV->get_coordinates().stop.min_pos, ref, chr);
-		fprintf(file, "%s", chr.c_str());
-		fprintf(file, "%c", '\t');
-		fprintf(file, "%i", pos);
-		fprintf(file, "%c", '\t');
-		fprintf(file, "%i", IPrinter::calc_pos(SV->get_coordinates().stop.max_pos, ref, chr));
-		fprintf(file, "%c", '\t');
-		fprintf(file, "%i", id);
-		id++;
-		fprintf(file, "%c", '\t');
-		fprintf(file, "%i", -1); //TODO: score
-		fprintf(file, "%c", '\t');
-		fprintf(file, "%c", strands[0]);
-		fprintf(file, "%c", '\t');
-		fprintf(file, "%c", strands[1]);
-		fprintf(file, "%c", '\t');
-		fprintf(file, "%s", IPrinter::get_type(SV->get_SVtype()).c_str());
-		fprintf(file, "%c", '\t');
-		fprintf(file, "%i", SV->get_support());
-		fprintf(file, "%c", '\t');
-		pos = IPrinter::calc_pos(SV->get_coordinates().start.most_support, ref, chr);
-		fprintf(file, "%s", chr.c_str());
-		fprintf(file, "%c", '\t');
-		fprintf(file, "%i", pos);
-		fprintf(file, "%c", '\t');
-		pos = IPrinter::calc_pos(SV->get_coordinates().stop.most_support, ref, chr);
-		fprintf(file, "%s", chr.c_str());
-		fprintf(file, "%c", '\t');
-		fprintf(file, "%i", pos);
-		fprintf(file, "%c", '\t');
-		fprintf(file, "%i", SV->get_length());
-		fprintf(file, "%c", '\t');
-		fprintf(file, "%i", SV->get_support());
-		fprintf(file, "%c", '\n');
+		pair<double, double> kurtosis;
+		pair<double, double> std_quant;
+		double std_length = 0;
+
+		if (to_print(SV, std_quant, kurtosis, std_length)) {
+
+			std::string chr;
+			std::string strands = SV->get_strand(2);
+			int pos = IPrinter::calc_pos(SV->get_coordinates().start.min_pos, ref, chr);
+			fprintf(file, "%s", chr.c_str());
+			fprintf(file, "%c", '\t');
+			fprintf(file, "%i", pos);
+			fprintf(file, "%c", '\t');
+			fprintf(file, "%i", IPrinter::calc_pos(SV->get_coordinates().start.max_pos, ref, chr));
+			fprintf(file, "%c", '\t');
+			pos = IPrinter::calc_pos(SV->get_coordinates().stop.min_pos, ref, chr);
+			fprintf(file, "%s", chr.c_str());
+			fprintf(file, "%c", '\t');
+			fprintf(file, "%i", pos);
+			fprintf(file, "%c", '\t');
+			fprintf(file, "%i", IPrinter::calc_pos(SV->get_coordinates().stop.max_pos, ref, chr));
+			fprintf(file, "%c", '\t');
+			fprintf(file, "%i", id);
+			id++;
+			fprintf(file, "%c", '\t');
+			fprintf(file, "%i", -1); //TODO: score
+			fprintf(file, "%c", '\t');
+			fprintf(file, "%c", strands[0]);
+			fprintf(file, "%c", '\t');
+			fprintf(file, "%c", strands[1]);
+			fprintf(file, "%c", '\t');
+			fprintf(file, "%s", IPrinter::get_type(SV->get_SVtype()).c_str());
+			fprintf(file, "%c", '\t');
+			fprintf(file, "%i", SV->get_support());
+			fprintf(file, "%c", '\t');
+			pos = IPrinter::calc_pos(SV->get_coordinates().start.most_support, ref, chr);
+			fprintf(file, "%s", chr.c_str());
+			fprintf(file, "%c", '\t');
+			fprintf(file, "%i", pos);
+			fprintf(file, "%c", '\t');
+			pos = IPrinter::calc_pos(SV->get_coordinates().stop.most_support, ref, chr);
+			fprintf(file, "%s", chr.c_str());
+			fprintf(file, "%c", '\t');
+			fprintf(file, "%i", pos);
+			fprintf(file, "%c", '\t');
+			if (((SV->get_SVtype() & INS) && SV->get_length() == Parameter::Instance()->huge_ins) && !SV->get_types().is_SR) {
+				fprintf(file, "%s", "NA");
+			} else {
+				fprintf(file, "%i", SV->get_length());
+			}
+			//fprintf(file, "%c", '\t');
+			//fprintf(file, "%i", SV->get_support());
+			fprintf(file, "%c", '\n');
+		}
 	}
 }
diff --git a/src/print/IPrinter.cpp b/src/print/IPrinter.cpp
index 8b72316..b3e5946 100644
--- a/src/print/IPrinter.cpp
+++ b/src/print/IPrinter.cpp
@@ -7,34 +7,51 @@
 
 #include "IPrinter.h"
 
-bool IPrinter::to_print(Breakpoint * &SV, double & std_start, double & std_stop) {
+bool IPrinter::is_huge_ins(Breakpoint * &SV) {
+	int counts=0;
+	std::map<std::string, read_str> support = SV->get_coordinates().support;
+	for (std::map<std::string, read_str>::iterator i = support.begin(); i != support.end(); i++) {
+		if(((*i).second.coordinates.second - (*i).second.coordinates.first) == Parameter::Instance()->huge_ins){
+			counts++;
+		}
+	}
+	//std::cout<<"Ratio: "<<((double)counts/(double)support.size())<<std::endl;
+	return ((double)counts/(double)support.size() > 0.3);
+}
+bool IPrinter::to_print(Breakpoint * &SV, pair<double, double>& std, pair<double, double> & kurtosis, double & std_length) {
+
+	std.first = 0;
+	std.second = 0;
 
-	std_start = 0;
-	std_stop = 0;
 	//comp_std(SV, std_start, std_stop);
-	comp_std_quantile(SV, std_start, std_stop);
+	std_length = 0;
+	kurtosis = comp_std_quantile(SV, std, std_length);
 	bool to_print = true;
 
-	if(((SV->get_SVtype() & INS) && SV->get_coordinates().stop.most_support- SV->get_coordinates().start.most_support == Parameter::Instance()->huge_ins) && (SV->get_types().is_ALN)){
-		return (!SV->get_types().is_SR && (std_start<5 && std_stop<5));
+	if ((SV->get_SVtype() & INS) && is_huge_ins(SV)) {
+		return (std.first < 5 || std.second < 5);
 	}
 
 	if ((SV->get_SVtype() & INS) || (SV->get_SVtype() & DEL)) { //for insertions  + deletions:
 		double dist = (double) (SV->get_coordinates().stop.most_support - SV->get_coordinates().start.most_support);
 		//dist = (double)std::min(((int)dist * 4), Parameter::Instance()->max_dist);
 		dist = dist * 4.0 * (uniform_variance / 2); //because we test against corrected value!
-		//std::cout<<"DIST: "<<(SV->get_coordinates().stop.most_support - SV->get_coordinates().start.most_support)<<" "<<dist<<" STD: "<<std_start<<" "<<std_stop<<std::endl;
-		return ((std_start < dist && std_stop < dist)); //0.2886751
+//		std::cout<<"DIST: "<<(SV->get_coordinates().stop.most_support - SV->get_coordinates().start.most_support)<<" "<<dist<<" STD: "<<std.first<<" "<<std.second<<std::endl;
+		return ((std.first < dist && std.second < dist)); //0.2886751
 	}
 
 	//TODO test!
 	if (SV->get_SVtype() & NEST) {
 		return true;
 	}
-	double max_allowed=4*Parameter::Instance()->max_dist*(uniform_variance / 2);
+	double max_allowed = 4 * Parameter::Instance()->max_dist * (uniform_variance / 2);
 
-	return (std_start < max_allowed && std_stop < max_allowed);
+	if (SV->get_coordinates().support.size() < 8) { //not enough coverage
+//		max_allowed = 4 * Parameter::Instance()->max_dist * (uniform_variance);
+			//max_allowed = max_allowed * ((double) SV->get_coordinates().support.size()/10.0);
+	}
 
+	return (std.first < max_allowed && std.second < max_allowed);
 }
 
 std::string IPrinter::get_chr(long pos, RefVector ref) {
@@ -62,8 +79,9 @@ long IPrinter::calc_pos(long pos, RefVector ref, std::string &chr) {
 	size_t i = 0;
 	pos -= (ref[i].RefLength + Parameter::Instance()->max_dist);
 
-	while (i < ref.size() && pos >= 0) {
+	while (i + 1 < ref.size() && pos >= 0) {
 		i++;
+		//	std::cout<<i<<" "<<ref.size()<<" "<<pos<<" "<<pos-Parameter::Instance()->max_dist<<std::endl;
 		pos -= ((long) ref[i].RefLength + (long) Parameter::Instance()->max_dist);
 	}
 	chr = ref[i].RefName;
@@ -155,54 +173,76 @@ void IPrinter::comp_std_med(Breakpoint * &SV, double & std_start, double & std_s
 	std_stop = std::sqrt(std_stop_dists[median]);
 }
 
-void IPrinter::comp_std_quantile(Breakpoint * &SV, double & std_start, double & std_stop) {
+pair<double, double> IPrinter::comp_std_quantile(Breakpoint * &SV, pair<double, double> & std, double & std_length) {
 	double count = 0;
 	std::vector<int> std_start_dists;
 	std::vector<int> std_stop_dists;
+	std::vector<int> std_length_dists;
 
-	std::stringstream ss;
-
+	//std::stringstream ss;
+	double s4_start = 0;
+	double s4_stop = 0;
+	double s2_start = 0;
+	double s2_stop = 0;
+	std_length = 0;
 	std::map<std::string, read_str> support = SV->get_coordinates().support;
-
-	fprintf(distances, "%s", IPrinter::get_type(SV->get_SVtype()).c_str());
-	fprintf(distances, "%c", '\t');
-	fprintf(distances, "%i", SV->get_coordinates().stop.most_support-SV->get_coordinates().start.most_support);
-	fprintf(distances, "%c", '\t');
-	fprintf(distances, "%i", support.size());
-	fprintf(distances, "%c", '\t');
 	for (std::map<std::string, read_str>::iterator i = support.begin(); i != support.end(); i++) {
 		if ((*i).second.SV & SV->get_SVtype()) {
+			long diff = SV->get_length() - ((*i).second.coordinates.second - (*i).second.coordinates.first);
+			//	sort_insert(std::pow((double) diff, 2.0), std_length_dists); //TODO think about that!!
+			std_length += std::pow((double) diff, 2.0);
 			if ((*i).second.coordinates.first != -1) {
-				long diff = (SV->get_coordinates().start.most_support - (*i).second.coordinates.first);
-				ss << '\t';
-				ss << diff;
+				diff = (SV->get_coordinates().start.most_support - (*i).second.coordinates.first);
+				//ss << '\t';
+				//ss << diff;
 				sort_insert(std::pow((double) diff, 2.0), std_start_dists);
+				s4_start += std::pow((double) diff, 4.0);
+				s2_start += std::pow((double) diff, 2.0);
 			}
 			if ((*i).second.coordinates.second != -1) {
-				long diff = (SV->get_coordinates().stop.most_support - (*i).second.coordinates.second);
-				ss << '\t';
-				ss << diff;
+				diff = (SV->get_coordinates().stop.most_support - (*i).second.coordinates.second);
+				//ss << '\t';
+				//ss << diff;
 
 				sort_insert(std::pow((double) diff, 2.0), std_stop_dists);
+				s4_stop += std::pow((double) diff, 4.0);
+				s2_stop += std::pow((double) diff, 2.0);
 			}
 		}
+		count++;
 	}
+	std_length = std::sqrt(std_length / count);
+
+	s4_start = s4_start / count;
+	s4_stop = s4_stop / count;
+	s2_start = s2_start / count;
+	s2_stop = s2_stop / count;
 
 	count = 0;
-	for (int i = 0; i < std::max((int)std_stop_dists.size() / 2,8); i++) {
-		std_start += std_start_dists[i];
-		std_stop += std_stop_dists[i];
+	for (int i = 0; i < std::max((int) std_stop_dists.size() / 2, 10) && i < std_start_dists.size(); i++) {
+		std.first += std_start_dists[i];
+		std.second += std_stop_dists[i];
 		count++;
+
+		/*s4_start += std::pow((double) std_start_dists[i], 2.0);
+		 s4_stop += std::pow((double) std_stop_dists[i], 2.0);
+		 s2_start += std_start_dists[i];
+		 s2_stop += std_stop_dists[i];*/
 	}
 
-	std_start = std::sqrt(std_start / count);
-	std_stop = std::sqrt(std_stop / count);
-	fprintf(distances, "%f", std_start);
-	fprintf(distances, "%s",ss.str().c_str());
-	fprintf(distances, "%c", '\n');
+	pair<double, double> kurtosis;
+	kurtosis.first = (s4_start / std::pow(s2_start, 2.0)) - 3;
+	kurtosis.second = (s4_stop / std::pow(s2_stop, 2.0)) - 3;
+
+	std.first = std::sqrt(std.first / count);
+	std.second = std::sqrt(std.second / count);
 
+	for (size_t i = 0; i < std_length_dists.size(); i++) {
 
+	}
+	return kurtosis;
 }
+
 void IPrinter::comp_std(Breakpoint * &SV, double & std_start, double & std_stop) {
 	double count = 0;
 	std_start = 0;
diff --git a/src/print/IPrinter.h b/src/print/IPrinter.h
index d6ce613..6096a05 100644
--- a/src/print/IPrinter.h
+++ b/src/print/IPrinter.h
@@ -19,7 +19,6 @@
 
 double const uniform_variance = 0.2886751; //sqrt(1/12) see variance of uniform distribution -> std
 
-
 class IPrinter {
 protected:
 	FILE *file;
@@ -36,7 +35,8 @@ protected:
 	long calc_pos(long pos, RefVector ref, std::string &chr);
 	std::string get_chr(long pos, RefVector ref);
 	std::string get_type(char type);
-	void sort_insert(int pos,std::vector<int> & positons);
+	void sort_insert(int pos, std::vector<int> & positons);
+	bool is_huge_ins(Breakpoint * &SV);
 public:
 
 	IPrinter() {
@@ -53,32 +53,43 @@ public:
 		print_body(SV, ref);
 	}
 	void init() {
-		distances=fopen("distances.txt", "w");
-		if(!Parameter::Instance()->output_vcf.empty()){
-			file = fopen(Parameter::Instance()->output_vcf.c_str(), "w");
-		}else if(!Parameter::Instance()->output_bedpe.empty()){
-			file = fopen(Parameter::Instance()->output_bedpe.c_str(), "w");
+		try {
+			if (!Parameter::Instance()->output_vcf.empty()) {
+				file = fopen(Parameter::Instance()->output_vcf.c_str(), "w");
+			} else if (!Parameter::Instance()->output_bedpe.empty()) {
+				file = fopen(Parameter::Instance()->output_bedpe.c_str(), "w");
+			}
+		} catch (...) {
+			std::cerr << "Output file could not be created. Please check if path exists and if you have write permissions." << std::endl;
+			exit(0);
 		}
-		print_header();
+		if (file == NULL) {
+			std::cerr << "Output file could not be created. Please check if path exists and if you have write permissions." << std::endl;
+			exit(EXIT_FAILURE);
+		}
+
+
 		BamParser *mapped_file = new BamParser(Parameter::Instance()->bam_files[0]);
 		this->ref = mapped_file->get_refInfo();
-
+		print_header();
 		if (!Parameter::Instance()->ignore_regions_bed.empty()) {
 			std::cout << "Cross checking..." << std::endl;
 			initialize_bed(bed_tree, root, ref);
 		}
-		string tmp_name_file=Parameter::Instance()->tmp_file;
-		tmp_name_file+="Names";
-		tmp_file=fopen(tmp_name_file.c_str(), "wb");
+		string tmp_name_file = Parameter::Instance()->tmp_file;
+		if (Parameter::Instance()->phase) {
+			tmp_name_file += "Names";
+			tmp_file = fopen(tmp_name_file.c_str(), "wb");
+		}
 	}
-	bool to_print(Breakpoint * &SV,double & std_start,double &std_stop);
+	bool to_print(Breakpoint * &SV, pair<double, double> &std, pair<double, double> & kurtosis, double & std_length);
 	void store_readnames(std::vector<long> names, int id);
-	void close_file(){
+	void close_file() {
 		fclose(this->file);
 	}
-	void comp_std(Breakpoint * &SV,double & std_start, double & std_stop);
+	void comp_std(Breakpoint * &SV, double & std_start, double & std_stop);
 	void comp_std_med(Breakpoint * &SV, double & std_start, double & std_stop);
-	void comp_std_quantile(Breakpoint * &SV, double & std_start, double & std_stop);
+	pair<double, double> comp_std_quantile(Breakpoint * &SV, pair<double, double>& std, double & std_lenght);
 	const std::string currentDateTime();
 };
 
diff --git a/src/print/VCFPrinter.cpp b/src/print/VCFPrinter.cpp
index 1515355..6bb2769 100644
--- a/src/print/VCFPrinter.cpp
+++ b/src/print/VCFPrinter.cpp
@@ -8,61 +8,100 @@
 #include "VCFPrinter.h"
 
 void VCFPrinter::print_header() {
-
 	fprintf(file, "%s", "##fileformat=VCFv4.2\n");
 	fprintf(file, "%s", "##source=Sniffles\n");
 	string time = currentDateTime();
 	fprintf(file, "%s", "##fileDate=");
 	fprintf(file, "%s", time.c_str());
-	fprintf(file, "%s", "\n"); //TODO change!
+
+	//REport over all chrs:
+	for (size_t i = 0; i < this->ref.size(); i++) {
+		fprintf(file, "%s", "\n");
+		fprintf(file, "%s", "##contig=<ID=");
+		fprintf(file, "%s",ref[i].RefName.c_str());
+		fprintf(file, "%s", ",length=");
+		fprintf(file, "%i",(int)ref[i].RefLength);
+		fprintf(file, "%c",'>');
+	}
+
+	fprintf(file, "%s", "\n");
 	fprintf(file, "%s", "##ALT=<ID=DEL,Description=\"Deletion\">\n");
 	fprintf(file, "%s", "##ALT=<ID=DUP,Description=\"Duplication\">\n");
 	fprintf(file, "%s", "##ALT=<ID=INV,Description=\"Inversion\">\n");
-	fprintf(file, "%s", "##ALT=<ID=INVDUP,Description=\"InvertedDUP with unknown boundaries\">\n");
+	fprintf(file, "%s",
+			"##ALT=<ID=INVDUP,Description=\"InvertedDUP with unknown boundaries\">\n");
 	fprintf(file, "%s", "##ALT=<ID=TRA,Description=\"Translocation\">\n");
 	fprintf(file, "%s", "##ALT=<ID=INS,Description=\"Insertion\">\n");
-	fprintf(file, "%s", "##INFO=<ID=CHR2,Number=1,Type=String,Description=\"Chromosome for END coordinate in case of a translocation\">\n");
-	fprintf(file, "%s", "##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the structural variant\">\n");
-	fprintf(file, "%s", "##INFO=<ID=MAPQ,Number=1,Type=Integer,Description=\"Median mapping quality of paired-ends\">\n");
-	fprintf(file, "%s", "##INFO=<ID=RE,Number=1,Type=Integer,Description=\"read support\">\n");
-	fprintf(file, "%s", "##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description=\"Imprecise structural variation\">\n");
-	fprintf(file, "%s", "##INFO=<ID=PRECISE,Number=0,Type=Flag,Description=\"Precise structural variation\">\n");
-	fprintf(file, "%s", "##INFO=<ID=SVLEN,Number=1,Type=Integer,Description=\"Length of the SV\">\n");
-	fprintf(file, "%s", "##INFO=<ID=SVMETHOD,Number=1,Type=String,Description=\"Type of approach used to detect SV\">\n");
-	fprintf(file, "%s", "##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">\n");
-	fprintf(file, "%s", "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n");
-	fprintf(file, "%s", "##FORMAT=<ID=DR,Number=1,Type=Integer,Description=\"# high-quality reference reads\">\n");
-	fprintf(file, "%s", "##FORMAT=<ID=DV,Number=1,Type=Integer,Description=\"# high-quality variant reads\">\n");
-
-	fprintf(file, "%s", "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT");
+	fprintf(file, "%s",
+			"##INFO=<ID=CHR2,Number=1,Type=String,Description=\"Chromosome for END coordinate in case of a translocation\">\n");
+	fprintf(file, "%s",
+			"##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the structural variant\">\n");
+	fprintf(file, "%s",
+			"##INFO=<ID=MAPQ,Number=1,Type=Integer,Description=\"Median mapping quality of paired-ends\">\n");
+	fprintf(file, "%s",
+			"##INFO=<ID=RE,Number=1,Type=Integer,Description=\"read support\">\n");
+	fprintf(file, "%s",
+			"##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description=\"Imprecise structural variation\">\n");
+	fprintf(file, "%s",
+			"##INFO=<ID=PRECISE,Number=0,Type=Flag,Description=\"Precise structural variation\">\n");
+	fprintf(file, "%s",
+			"##INFO=<ID=SVLEN,Number=1,Type=Integer,Description=\"Length of the SV\">\n");
+	fprintf(file, "%s",
+			"##INFO=<ID=SVMETHOD,Number=1,Type=String,Description=\"Type of approach used to detect SV\">\n");
+	fprintf(file, "%s",
+			"##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">\n");
+	fprintf(file, "%s",
+				"##INFO=<ID=STD_quant_start,Number=.,Type=Integer,Description=\"STD of the start breakpoints across the reads.\">\n");
+	fprintf(file, "%s",
+				"##INFO=<ID=STD_quant_stop,Number=.,Type=Integer,Description=\"STD of the stop breakpoints across the reads.\">\n");
+	fprintf(file, "%s",
+					"##INFO=<ID=Kurtosis_quant_start,Number=.,Type=Integer,Description=\"Kurtosis value of the start breakpoints accross the reads.\">\n");
+	fprintf(file, "%s",
+						"##INFO=<ID=Kurtosis_quant_stop,Number=.,Type=Integer,Description=\"Kurtosis value of the stop breakpoints accross the reads.\">\n");
+	fprintf(file, "%s",
+				"##INFO=<ID=SUPTYPE,Number=1,Type=String,Description=\"Type by which the variant is supported.(SR,ALN)\">\n");
+	fprintf(file, "%s",
+					"##INFO=<ID=SUPTYPE,Number=1,Type=String,Description=\"Type by which the variant is supported.(SR,ALN)\">\n");
+	fprintf(file, "%s",
+						"##INFO=<ID=STRANDS,Number=.,Type=String,Description=\"Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)\">\n");
+	fprintf(file, "%s",
+			"##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n");
+	fprintf(file, "%s",
+			"##FORMAT=<ID=DR,Number=1,Type=Integer,Description=\"# high-quality reference reads\">\n");
+	fprintf(file, "%s",
+			"##FORMAT=<ID=DV,Number=1,Type=Integer,Description=\"# high-quality variant reads\">\n");
+
+	fprintf(file, "%s",
+			"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT");
 	for (size_t i = 0; i < Parameter::Instance()->bam_files.size(); i++) {
 		fprintf(file, "%c", '\t');
 		fprintf(file, "%s", Parameter::Instance()->bam_files[i].c_str());
 	}
 	fprintf(file, "%c", '\n');
+
 }
 void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
-	if (!this->bed_tree.is_in(SV->get_coordinates().start.most_support, this->root) && !this->bed_tree.is_in(SV->get_coordinates().stop.most_support, this->root)) {
+	if (!this->bed_tree.is_in(SV->get_coordinates().start.most_support,
+			this->root)
+			&& !this->bed_tree.is_in(SV->get_coordinates().stop.most_support,
+					this->root)) {
 		//temp. store read names supporting this SVs to later group the SVs together.
-		//double std_start = 0;
-		//double std_stop = 0;
-	//	double std_medstart = 0;
-		//double std_medstop = 0;
 		double std_quant_start = 0;
 		double std_quant_stop = 0;
-		//comp_std(SV, std_start, std_stop);
-		//comp_std_med(SV, std_medstart, std_medstop);
-
-
 
-		/*	if ((SV->get_SVtype() & NEST) || ((std_start < SV->get_length() * 2 && std_stop < SV->get_length() * 2) && (std_start<400 && std_stop<400))) {
-		 */
-		if (to_print(SV,std_quant_start,std_quant_stop)) {
+		pair<double, double> kurtosis;
+		pair<double, double> std_quant;
+		double std_length = 0;
+		//to_print(SV, std_quant, kurtosis, std_length);
+		bool ok_to_print = (to_print(SV, std_quant, kurtosis, std_length)
+				|| Parameter::Instance()->ignore_std);
+		if (ok_to_print) {
 			if (Parameter::Instance()->phase) {
 				store_readnames(SV->get_read_ids(), id);
 			}
 			std::string chr;
-			int start = IPrinter::calc_pos(SV->get_coordinates().start.most_support, ref, chr);
+			int start = IPrinter::calc_pos(
+					SV->get_coordinates().start.most_support, ref, chr);
 			fprintf(file, "%s", chr.c_str());
 			fprintf(file, "%c", '\t');
 			fprintf(file, "%i", start);
@@ -70,80 +109,81 @@ void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
 			fprintf(file, "%i", id);
 			id++;
 
-			int end = IPrinter::calc_pos(SV->get_coordinates().stop.most_support, ref, chr);
+			int end = IPrinter::calc_pos(
+					SV->get_coordinates().stop.most_support, ref, chr);
 			std::string strands = SV->get_strand(1);
 			fprintf(file, "%s", "\tN\t");
-			/*if (SV->get_SVtype() & TRA) {	//TODO check for INV!
-			 //N[22:36765684[ +-
-			 //]21:10540232]N -+
-			 if (strands[0] == '+') {
-			 fprintf(file, "%s", "N[");
-			 fprintf(file, "%s", chr.c_str());
-			 fprintf(file, "%c", ':');
-			 fprintf(file, "%i", pos);
-
-			 } else {
-			 fprintf(file, "%c", ']');
-			 fprintf(file, "%s", chr.c_str());
-			 fprintf(file, "%c", ':');
-			 fprintf(file, "%i", pos);
-			 }
-			 if (strands[1] == '+') {
-			 fprintf(file, "%c", '[');
-			 } else {
-			 fprintf(file, "%c", ']');
-			 }
-			 if (strands[0] == '-') {
-			 fprintf(file, "%c", 'N');
-			 }
-			 fprintf(file, "%s", "\t.\tPASS\tIMPRECISE;SVMETHOD=Snifflesv");
-			 fprintf(file, "%s", Parameter::Instance()->version.c_str());
-
-			 } else {*/
-
-			fprintf(file, "%c", '<');
-			fprintf(file, "%s", IPrinter::get_type(SV->get_SVtype()).c_str());
-			fprintf(file, "%c", '>');
+			if (Parameter::Instance()->reportBND && (SV->get_SVtype() & TRA)) {
+				//N[22:36765684[ +-
+				//]21:10540232]N -+
+				if (strands[0] == '-' && strands[0] =='+') {
+					fprintf(file, "%s", "]");
+					fprintf(file, "%s", chr.c_str());
+					fprintf(file, "%c", ':');
+					fprintf(file, "%i", end);
+					fprintf(file, "%s", "]N");
+
+				} else {
+					fprintf(file, "%s", "N[");
+					fprintf(file, "%s", chr.c_str());
+					fprintf(file, "%c", ':');
+					fprintf(file, "%i", end);
+					fprintf(file, "%c", '[');
+				}
+			} else {
+
+				fprintf(file, "%c", '<');
+				fprintf(file, "%s",
+						IPrinter::get_type(SV->get_SVtype()).c_str());
+				fprintf(file, "%c", '>');
+			}
 
 			fprintf(file, "%s", "\t.\tPASS\t");
-			if(std_quant_start<10 && std_quant_stop<10){
+			if (std_quant.first < 10 && std_quant.second < 10) {
 				fprintf(file, "%s", "PRECISE");
-			}else{
+			} else {
 				fprintf(file, "%s", "IMPRECISE");
 			}
 
 			fprintf(file, "%s", ";SVMETHOD=Snifflesv");
 			fprintf(file, "%s", Parameter::Instance()->version.c_str());
-			fprintf(file, "%s", ";CHR2=");
-			fprintf(file, "%s", chr.c_str());
-			fprintf(file, "%s", ";END=");
-			if (SV->get_SVtype() & INS) {
-				fprintf(file, "%i", std::max((int) (end - SV->get_length()), start));
-				//	} else if (SV->get_SVtype() & NEST) {
-				//	fprintf(file, "%i", start);
-			} else {
-				fprintf(file, "%i", end);
-			}
+			if (!(Parameter::Instance()->reportBND && (SV->get_SVtype() & TRA))) {
+				fprintf(file, "%s", ";CHR2=");
+				fprintf(file, "%s", chr.c_str());
+				fprintf(file, "%s", ";END=");
 
-			/*fprintf(file, "%s", ";STD_start=");
-			fprintf(file, "%f", std_start);
-			fprintf(file, "%s", ";STD_med_start=");
-			fprintf(file, "%f", std_medstart);*/
+				if (SV->get_SVtype() & INS) {
+					fprintf(file, "%i",std::max((int) (end - SV->get_length()), start));
+				} else {
+					fprintf(file, "%i", end);
+				}
+			}
 			fprintf(file, "%s", ";STD_quant_start=");
-			fprintf(file, "%f", std_quant_start);
-			/*fprintf(file, "%s", ";STD_end=");
-			fprintf(file, "%f", std_stop);
-			fprintf(file, "%s", ";STD_med_stop=");
-			fprintf(file, "%f", std_medstop);*/
+			fprintf(file, "%f", std_quant.first);
 			fprintf(file, "%s", ";STD_quant_stop=");
-			fprintf(file, "%f", std_quant_stop);
-			//	}
+			fprintf(file, "%f", std_quant.second);
+			fprintf(file, "%s", ";Kurtosis_quant_start=");
+			fprintf(file, "%f", kurtosis.first);
+			fprintf(file, "%s", ";Kurtosis_quant_stop=");
+			fprintf(file, "%f", kurtosis.second);
+			//		fprintf(file, "%s", ";STD_length=");
+			//		fprintf(file, "%f", std_length);
+
+
 			fprintf(file, "%s", ";SVTYPE=");
-			fprintf(file, "%s", IPrinter::get_type(SV->get_SVtype()).c_str());
+			if (Parameter::Instance()->reportBND && (SV->get_SVtype() & TRA)  ) {
+				fprintf(file, "%s", "BND");
+			}else{
+				fprintf(file, "%s", IPrinter::get_type(SV->get_SVtype()).c_str());
+			}
 			if (Parameter::Instance()->report_n_reads > 0) {
-				std::vector<std::string> names = SV->get_read_names(Parameter::Instance()->report_n_reads);
+				std::vector<std::string> names = SV->get_read_names(
+						Parameter::Instance()->report_n_reads);
 				fprintf(file, "%s", ";RNAMES=");
-				for (size_t i = 0; i < names.size() && i < Parameter::Instance()->report_n_reads; i++) {
+				for (size_t i = 0;
+						i < names.size()
+								&& i < Parameter::Instance()->report_n_reads;
+						i++) {
 					fprintf(file, "%s", names[i].c_str());
 					fprintf(file, "%s", ";");
 				}
@@ -156,14 +196,47 @@ void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
 			//	if (SV->get_SVtype() & NEST) {
 			//		fprintf(file, "%i", -1);
 			//	} else {
-			if(((SV->get_SVtype() & INS) && SV->get_length()== Parameter::Instance()->huge_ins) && !SV->get_types().is_SR ){
+			if (((SV->get_SVtype() & INS)
+					&& SV->get_length() == Parameter::Instance()->huge_ins)
+					&& !SV->get_types().is_SR) {
 				fprintf(file, "%s", "NA");
-			}else{
+			} else {
 				fprintf(file, "%i", SV->get_length());
 			}
 			//	}
 			fprintf(file, "%s", ";STRANDS=");
 			fprintf(file, "%s", strands.c_str());
+		/*	fprintf(file, "%s", ";STRANDS2=");
+
+			std::map<std::string, read_str> support =
+					SV->get_coordinates().support;
+			pair<int, int> tmp_start;
+			pair<int, int> tmp_stop;
+			tmp_start.first = 0;
+			tmp_start.second = 0;
+			tmp_stop.first = 0;
+			tmp_stop.second = 0;
+			for (std::map<std::string, read_str>::iterator i = support.begin();
+					i != support.end(); i++) {
+				if ((*i).second.read_strand.first) {
+					tmp_start.first++;
+				} else {
+					tmp_start.second++;
+				}
+				if ((*i).second.read_strand.second) {
+					tmp_stop.first++;
+				} else {
+					tmp_stop.second++;
+				}
+			}
+			fprintf(file, "%i", tmp_start.first);
+			fprintf(file, "%s", ",");
+			fprintf(file, "%i", tmp_start.second);
+			fprintf(file, "%s", ",");
+			fprintf(file, "%i", tmp_stop.first);
+			fprintf(file, "%s", ",");
+			fprintf(file, "%i", tmp_stop.second);*/
+
 			fprintf(file, "%s", ";RE=");
 			fprintf(file, "%i", SV->get_support());
 			fprintf(file, "%s", "\tGT:DR:DV\t./.:.:");
diff --git a/src/sub/Breakpoint.cpp b/src/sub/Breakpoint.cpp
index 416cca4..02128cb 100644
--- a/src/sub/Breakpoint.cpp
+++ b/src/sub/Breakpoint.cpp
@@ -57,10 +57,9 @@ bool Breakpoint::check_SVtype(Breakpoint * break1, Breakpoint * break2) { //todo
 		return true;
 	} else if (((SV1 & NEST) && (SV2 & NEST)) || (((SV1 & NEST) && (SV2 & INV)) || ((SV1 & INV) && (SV2 & NEST)))) {
 		return true;
+	} else if (((SV1 & DUP) && (SV2 & INS)) || ((SV1 & INS) && (SV2 & DUP))) { //DUP and ins have often the same signal for alignments.
+		return true;
 	}
-	//else if (((SV1 & DUP) && (SV2 & INS)) || ((SV1 & INS) && (SV2 & DUP))) {
-	//	return true;
-	//}
 	//std::cout<<"S1: "<<print_type(SV1)<<" S2 "<<print_type(SV2)<<" "<<(*break2->get_coordinates().support.begin()).second.type<<std::endl;
 	return (SV1 == SV2);
 }
@@ -122,7 +121,16 @@ long Breakpoint::overlap(Breakpoint * tmp) {
 
 	}
 //merging two robust calls:
+	/*if (is_same_strand(tmp) && (abs(tmp->get_coordinates().start.min_pos - positions.start.min_pos) < max_dist || abs(tmp->get_coordinates().stop.max_pos - positions.stop.max_pos) < max_dist)) {
+	 if (tmp->get_coordinates().stop.max_pos - tmp->get_coordinates().start.min_pos == Parameter::Instance()->huge_ins || positions.stop.max_pos - positions.start.min_pos == Parameter::Instance()->huge_ins) {
+	 if (flag) {
+	 cout << "\tHIT" << endl;
+	 }
+	 return 0;
+	 }
+	 }*/
 
+	//Standard merging.
 	if (is_same_strand(tmp) && (abs(tmp->get_coordinates().start.min_pos - positions.start.min_pos) < max_dist && abs(tmp->get_coordinates().stop.max_pos - positions.stop.max_pos) < max_dist)) {
 		if (flag) {
 			cout << "\tHIT" << endl;
@@ -130,9 +138,16 @@ long Breakpoint::overlap(Breakpoint * tmp) {
 		return 0;
 	}
 
+	//merging "huge ins" and observed ins:
+	if(is_same_strand(tmp) && (abs(tmp->get_coordinates().start.min_pos -tmp->get_coordinates().stop.max_pos) == Parameter::Instance()->huge_ins || abs(positions.start.min_pos -positions.stop.max_pos) == Parameter::Instance()->huge_ins) && (abs(tmp->get_coordinates().start.min_pos - positions.start.min_pos) < max_dist || abs(tmp->get_coordinates().stop.max_pos - positions.stop.max_pos) < max_dist)){
+		return 0;
+	}
+
+	//If there is an INVDUP:
 	if ((is_NEST(tmp, this) && is_same_strand(tmp)) && (abs(tmp->get_coordinates().start.min_pos - positions.start.min_pos) < Parameter::Instance()->max_dist || abs(tmp->get_coordinates().start.min_pos - positions.stop.min_pos) < Parameter::Instance()->max_dist)) {
 		return 0;
 	}
+
 //extend Split read by noisy region: //not longer needed??
 	/*	if (((tmp->get_types().is_Noise || this->get_types().is_Noise) && !(tmp->get_types().is_Noise && this->get_types().is_Noise))
 	 && (abs(tmp->get_coordinates().start.min_pos - positions.stop.min_pos) < max_dist / 2 || abs(tmp->get_coordinates().stop.max_pos - positions.start.max_pos) < max_dist / 2)) { //TODO maybe add SV type check!
@@ -156,58 +171,6 @@ long Breakpoint::overlap(Breakpoint * tmp) {
 void Breakpoint::add_read(Breakpoint * point) { //point = one read support!
 
 	if (point != NULL) {
-		/*if ((*point->get_coordinates().support.begin()).second.type == 0) {
-		 this->type.is_ALN = true;
-		 }
-		 if ((*point->get_coordinates().support.begin()).second.type == 1) {
-		 this->type.is_SR = true;
-		 }
-
-		 if ((point->get_types().is_ALN || this->get_types().is_ALN) && !(point->get_types().is_ALN && this->get_types().is_ALN)) { //just to extend a split read event
-		 this->type.is_ALN = false; //TODO trick ;)
-		 //THis is to merger noisy flanking regions:
-		 if (abs(point->get_coordinates().start.min_pos - positions.stop.min_pos) < Parameter::Instance()->max_dist / 2) { //left of the current position
-		 //start stays; stop changes
-		 this->positions.stop.min_pos = positions.stop.min_pos;
-		 this->positions.stop.max_pos = positions.stop.max_pos;
-		 //Now we make the stop positions invalid:
-		 std::map<std::string, read_str> support = this->positions.support;
-		 for (std::map<std::string, read_str>::iterator i = support.begin(); i != support.end(); i++) {
-		 (*i).second.coordinates.second = -1;
-		 }
-		 support = point->get_coordinates().support;
-		 for (std::map<std::string, read_str>::iterator i = support.begin(); i != support.end(); i++) {
-		 (*i).second.coordinates.second = -1;
-		 }
-		 } else if (abs(point->get_coordinates().stop.max_pos - positions.start.max_pos) < Parameter::Instance()->max_dist / 2) { //right of the current position
-		 //	cout << "right " << endl;
-		 //stop stays; start changes:
-		 this->positions.start.min_pos = positions.start.min_pos;
-		 this->positions.start.max_pos = positions.start.max_pos;
-
-		 //Now we make the start positions invalid:
-		 std::map<std::string, read_str> support = this->positions.support;
-		 for (std::map<std::string, read_str>::iterator i = support.begin(); i != support.end(); i++) {
-		 (*i).second.coordinates.first = -1;
-		 }
-		 support = point->get_coordinates().support;
-		 for (std::map<std::string, read_str>::iterator i = support.begin(); i != support.end(); i++) {
-		 (*i).second.coordinates.first = -1;
-		 }
-		 }
-		 } else {			// merge two events:
-		 //grow interval:
-		 //	this->positions.start.min_pos = min(this->positions.start.min_pos, point->get_coordinates().start.min_pos);
-		 //	this->positions.start.max_pos = max(this->positions.start.max_pos, point->get_coordinates().start.max_pos);
-
-		 /*	if (point->get_coordinates().support.begin()->second.SV & INS) {
-		 this->positions.stop.min_pos = this->positions.start.max_pos;
-		 this->positions.stop.max_pos = this->positions.start.max_pos;
-		 } else {
-		 this->positions.stop.min_pos = min(this->positions.stop.min_pos, point->get_coordinates().stop.min_pos);
-		 this->positions.stop.max_pos = max(this->positions.stop.max_pos, point->get_coordinates().stop.max_pos);
-		 }
-		 }*/
 		//merge the support:
 		std::map<std::string, read_str> support = point->get_coordinates().support;
 		for (std::map<std::string, read_str>::iterator i = support.begin(); i != support.end(); i++) {
@@ -393,31 +356,36 @@ void Breakpoint::predict_SV() {
 	std::vector<long> lengths2;
 
 	for (std::map<std::string, read_str>::iterator i = positions.support.begin(); i != positions.support.end(); i++) {
-		if ((*i).second.SV & this->sv_type) { ///check type
-			//cout << "Hit" << endl;
+		if ((*i).second.SV & this->sv_type) {			// && !((*i).second.SV & INS && (*i).second.length==Parameter::Instance()->huge_ins)) { ///check type
+
 			if ((*i).second.coordinates.first != -1) {
-				if (starts.find((*i).second.coordinates.first) == starts.end()) {
-					starts[(*i).second.coordinates.first] = 1;
-				} else {
-					starts[(*i).second.coordinates.first]++;
+				if ((*i).second.length != Parameter::Instance()->huge_ins) {
+					if (starts.find((*i).second.coordinates.first) == starts.end()) {
+						starts[(*i).second.coordinates.first] = 1;
+					} else {
+						starts[(*i).second.coordinates.first]++;
+					}
 				}
 				start2.push_back((*i).second.coordinates.first);
 			}
 			if ((*i).second.coordinates.second != -1) { //TODO test
-				if (stops.find((*i).second.coordinates.second) == stops.end()) {
-					stops[(*i).second.coordinates.second] = 1;
-				} else {
-					stops[(*i).second.coordinates.second]++;
+				if ((*i).second.length != Parameter::Instance()->huge_ins) {
+					if (stops.find((*i).second.coordinates.second) == stops.end()) {
+						stops[(*i).second.coordinates.second] = 1;
+					} else {
+						stops[(*i).second.coordinates.second]++;
+					}
 				}
 				stops2.push_back((*i).second.coordinates.second);
 			}
-			if ((*i).second.SV & INS) { //check lenght for ins only!
-				//	std::cout<<"LENGTH 1st: "<<(*i).second.length<<" "<<(*i).first<<std::endl;
-				if (lengths.find((*i).second.length) == lengths.end()) {
-					lengths[(*i).second.length] = 1;
-				} else {
-
-					lengths[(*i).second.length]++;
+			if (((*i).second.SV & INS) ) { //check lenght for ins only!
+				if ((*i).second.length != Parameter::Instance()->huge_ins) {
+					if (lengths.find((*i).second.length) == lengths.end()) {
+						lengths[(*i).second.length] = 1;
+					} else {
+
+						lengths[(*i).second.length]++;
+					}
 				}
 				lengths2.push_back((*i).second.length);
 			}
diff --git a/src/sub/Breakpoint.h b/src/sub/Breakpoint.h
index 233c421..b67b568 100644
--- a/src/sub/Breakpoint.h
+++ b/src/sub/Breakpoint.h
@@ -42,6 +42,7 @@ struct read_str {
 	short type; //split reads, cigar or md string
 	//for later assessment:
 	pair<bool, bool> strand;
+	pair<bool, bool> read_strand;
 	pair<long,long> coordinates; // I could use the bin tree for that!
 	char SV; // bit vector
 	int length;
diff --git a/src/sub/Detect_Breakpoints.cpp b/src/sub/Detect_Breakpoints.cpp
index 9ff6bc0..39c8a6e 100644
--- a/src/sub/Detect_Breakpoints.cpp
+++ b/src/sub/Detect_Breakpoints.cpp
@@ -19,6 +19,7 @@ long fuck_off(long pos, RefVector ref, std::string &chr) {
 	chr = ref[i].RefName;
 	return pos + ref[i].RefLength + (long) Parameter::Instance()->max_dist;
 }
+
 void store_pos(vector<hist_str> &positions, long pos, std::string read_name) {
 	for (size_t i = 0; i < positions.size(); i++) {
 		if (abs(positions[i].position - pos) < Parameter::Instance()->min_length) {
@@ -40,7 +41,7 @@ Breakpoint * split_points(vector<std::string> names, std::map<std::string, read_
 		new_support[names[i]] = support[names[i]];
 	}
 	position_str svs;
-	svs.start.min_pos = 0;//just to initialize. Should not be needed anymore at this stage of the prog.
+	svs.start.min_pos = 0; //just to initialize. Should not be needed anymore at this stage of the prog.
 	svs.stop.max_pos = 0;
 	svs.support = new_support;
 	Breakpoint * point = new Breakpoint(svs, (*new_support.begin()).second.coordinates.second - (*new_support.begin()).second.coordinates.first);
@@ -51,39 +52,39 @@ void detect_merged_svs(position_str point, RefVector ref, vector<Breakpoint *> &
 	new_points.clear(); //just in case!
 	vector<hist_str> pos_start;
 	vector<hist_str> pos_stop;
-
 	for (std::map<std::string, read_str>::iterator i = point.support.begin(); i != point.support.end(); i++) {
-		long pos = (*i).second.coordinates.first;
-		//std::cout<<(*i).second.coordinates.first<<" "<<(*i).second.coordinates.second<<std::endl;
-		store_pos(pos_start, pos, (*i).first);
-		pos = (*i).second.coordinates.second;
-		store_pos(pos_stop, pos, (*i).first);
+		//	std::cout << (*i).second.coordinates.first << "," << (*i).second.coordinates.second << std::endl;
+		store_pos(pos_start, (*i).second.coordinates.first, (*i).first);
+		store_pos(pos_stop, (*i).second.coordinates.second, (*i).first);
 	}
 //	std::cout << "Start: " << std::endl;
 	int start_count = 0;
+	//std::cout<<"Start pos: ";
 	for (size_t i = 0; i < pos_start.size(); i++) {
+		//std::cout<<pos_start[i].hits <<",";
 		if (pos_start[i].hits > Parameter::Instance()->min_support) {
 			start_count++;
 			/*		std::string chr = "";
-			long pos = fuck_off(pos_start[i].position, ref, chr);
-			std::cout << chr << " " << pos << ":" << pos_start[i].hits << " ; ";
-			*/
+			 long pos = fuck_off(pos_start[i].position, ref, chr);
+			 std::cout << chr << " " << pos << ":" << pos_start[i].hits << " ; ";
+			 */
 		}
 	}
 //	std::cout << std::endl;
 //	std::cout << "Stop: " << std::endl;
 	int stop_count = 0;
 	for (size_t i = 0; i < pos_stop.size(); i++) {
+		//	std::cout << pos_stop[i].hits << ",";
 		if (pos_stop[i].hits > Parameter::Instance()->min_support) {
 			stop_count++;
-		/*	std::string chr = "";
-			long pos = fuck_off(pos_stop[i].position, ref, chr);
-			std::cout << chr << " " << pos << ":" << pos_stop[i].hits << " ; ";
-			*/
+			/*	std::string chr = "";
+			 long pos = fuck_off(pos_stop[i].position, ref, chr);
+			 std::cout << chr << " " << pos << ":" << pos_stop[i].hits << " ; ";
+			 */
 		}
 	}
-//	std::cout << std::endl;
-//	std::cout << std::endl;
+	//std::cout << std::endl;
+	//std::cout << std::endl;
 
 	if (stop_count > 1 || start_count > 1) {
 		std::cout << "\tprocessing merged TRA" << std::endl;
@@ -148,41 +149,32 @@ bool should_be_stored(Breakpoint *& point) {
 	point->calc_support(); // we need that before:
 	//std::cout << "Stored: " << point->get_support() << " " << point->get_length() << std::endl;
 	if (point->get_SVtype() & TRA) { // we cannot make assumptions abut support yet.
-		return (point->get_support() > 2); // this is needed as we take each chr independently and just look at the primary alignment
+		return (point->get_support() > 1); // this is needed as we take each chr independently and just look at the primary alignment
 	} else if (point->get_support() >= Parameter::Instance()->min_support) {
 		point->predict_SV();
-		return (point->get_support() >= Parameter::Instance()->min_support && point->get_length() > Parameter::Instance()->min_length);
+		/*	std::cout<<"LEN check: "<<point->get_length()
+		 if(point->get_length() > Parameter::Instance()->min_length){
+		 cout<<" T "<<std::endl;
+		 }else{
+		 cout<<" T "<<std::endl;
+		 }*/
+		return (point->get_length() > Parameter::Instance()->min_length);
 	}
 
 	return false;
 }
 void polish_points(std::vector<Breakpoint *> & points, RefVector ref) { //TODO might be usefull! but why does the tree not fully work??
 	return;
-	if (!points.empty()) {
-
-		for (size_t i = 0; i < points.size(); i++) {
-			points[i]->predict_SV();
-			points[i]->calc_support();
-		}
-
-		for (size_t i = 0; i < points.size(); i++) {
-			if (!(points[i]->get_SVtype() & TRA)) { // we cannot make assumptions abut support yet.
-				for (size_t j = 0; j < points.size(); j++) {
-					if (!(points[j]->get_SVtype() & TRA)) {
-						if ((i != j)
-								&& (abs(points[j]->get_coordinates().start.most_support - points[i]->get_coordinates().start.most_support) < Parameter::Instance()->max_dist / 2
-										&& abs(points[j]->get_coordinates().stop.most_support - points[i]->get_coordinates().stop.most_support) < Parameter::Instance()->max_dist / 2)) {
-
-							std::string chr;
-							int pos1 = fuck_off(points[j]->get_coordinates().start.most_support, ref, chr);
-							std::cout << "HIT: " << chr << " " << pos1 << " " << TRANS_type((*points[j]->get_coordinates().support.begin()).second.SV) << " OTHER: ";
-
-							pos1 = fuck_off(points[i]->get_coordinates().start.most_support, ref, chr);
-
-							std::cout << chr << " " << pos1 << " " << TRANS_type((*points[i]->get_coordinates().support.begin()).second.SV) << std::endl;
-						}
+	for (size_t i = 0; i < points.size(); i++) {
+		if (points[i]->get_SVtype() & INS && (points[i]->get_length() == Parameter::Instance()->huge_ins)) {
+			for (size_t j = 0; j < points.size(); j++) {
+				if (i != j) {
+					if (abs(points[i]->get_coordinates().start.min_pos - points[j]->get_coordinates().start.min_pos) < Parameter::Instance()->max_dist || abs(points[i]->get_coordinates().stop.max_pos - points[j]->get_coordinates().stop.max_pos) < Parameter::Instance()->max_dist) {
+						std::cout << "HIT!: " << points[j]->get_coordinates().start.min_pos << " " << points[i]->get_coordinates().start.min_pos << " " << points[j]->get_coordinates().stop.max_pos << " " << points[i]->get_coordinates().stop.max_pos << " len: " << points[j]->get_length() << " " << points[i]->get_length() << std::endl;
+						break;
 					}
 				}
+
 			}
 		}
 	}
@@ -217,14 +209,9 @@ void detect_breakpoints(std::string read_filename, IPrinter *& printer) {
 //FILE * alt_allel_reads;
 	FILE * ref_allel_reads;
 	if (Parameter::Instance()->genotype) {
-
 		std::string output = Parameter::Instance()->tmp_file.c_str();
 		output += "ref_allele";
 		ref_allel_reads = fopen(output.c_str(), "wb");
-
-//	output = Parameter::Instance()->tmp_file.c_str();
-		//output += "alt_allele";
-		//alt_allel_reads = fopen(output.c_str(), "wb");
 	}
 	Alignment * tmp_aln = mapped_file->parseRead(Parameter::Instance()->min_mq);
 	long ref_space = get_ref_lengths(tmp_aln->getRefID(), ref);
@@ -237,12 +224,13 @@ void detect_breakpoints(std::string read_filename, IPrinter *& printer) {
 			if (current_RefID != tmp_aln->getRefID()) {
 				current_RefID = tmp_aln->getRefID();
 				ref_space = get_ref_lengths(tmp_aln->getRefID(), ref);
-				std::cout << "\tSwitch Chr " << ref[tmp_aln->getRefID()].RefName << " " << ref[tmp_aln->getRefID()].RefLength << std::endl;
+				std::cout << "\tSwitch Chr " << ref[tmp_aln->getRefID()].RefName << std::endl;//" " << ref[tmp_aln->getRefID()].RefLength
 				std::vector<Breakpoint *> points;
 				//	clarify(points);
 				bst.get_breakpoints(root, points);
 				polish_points(points, ref);
 				for (int i = 0; i < points.size(); i++) {
+
 					if (should_be_stored(points[i])) {
 						//detect_merged_svs(points[i]);
 						if (points[i]->get_SVtype() & TRA) {
@@ -308,41 +296,46 @@ void detect_breakpoints(std::string read_filename, IPrinter *& printer) {
 		num_reads++;
 
 		if (num_reads % 10000 == 0) {
-			//	curr = tmp_aln->getName();
 			cout << "\t\t# Processed reads: " << num_reads << endl;
-			/*(if (curr.size() == prev.size() && strcmp(prev.c_str(), curr.c_str()) == 0) {
-			 std::cerr << "Read occurred twice with primary alignment marked. Possibly no eof recognized in bam file." << std::endl;
-			 break;
-			 }
-			 prev = curr;*/
+			//" \r";
+			//std::cout.flush();
 		}
 	}
 //filter and copy results:
 	std::cout << "Finalizing  .." << std::endl;
 	std::vector<Breakpoint *> points;
 	bst.get_breakpoints(root, points);
-	polish_points(points, ref);
+	//std::cout<<"got breakpoints"<<std::endl;
+
+	//polish_points(points, ref);
 	for (int i = 0; i < points.size(); i++) {
+		//std::cout<<"start check"<<" "<<i<<std::endl;
 		if (should_be_stored(points[i])) {
-
 			if (points[i]->get_SVtype() & TRA) {
+
 				final.insert(points[i], root_final);
+				//		std::cout<<"Done insert"<<" "<<i<<std::endl;
 			} else {
+				//		std::cout<<"start print"<<" "<<i<<std::endl;
 				printer->printSV(points[i]);
+				//		std::cout<<"Done print"<<" "<<i<<std::endl;
 			}
 		}
 	}
+	//std::cout<<"Fine"<<std::endl;
 	bst.clear(root);
 	if (Parameter::Instance()->genotype) {
 		fclose(ref_allel_reads);
 	}
 //	sweep->finalyze();
+
 	points.clear();
 	final.get_breakpoints(root_final, points);
-
+	//std::cout<<"Detect merged tra"<<std::endl;
 	size_t points_size = points.size();
 	for (size_t i = 0; i < points_size; i++) { // its not nice, but I may alter the length of the vector within the loop.
 		if (points[i]->get_SVtype() & TRA) {
+
 			vector<Breakpoint *> new_points;
 			detect_merged_svs(points[i]->get_coordinates(), ref, new_points);
 			if (!new_points.empty()) {							// I only allow for 1 split!!
@@ -351,16 +344,17 @@ void detect_breakpoints(std::string read_filename, IPrinter *& printer) {
 			}
 		}
 	}
-
+	//std::cout<<"fin up"<<std::endl;
 	for (size_t i = 0; i < points.size(); i++) {
 		if (points[i]->get_SVtype() & TRA) {
 			points[i]->calc_support();
 			points[i]->predict_SV();
 		}
-		if (points[i]->get_support() > Parameter::Instance()->min_support && points[i]->get_length() > Parameter::Instance()->min_length) {
+		if (points[i]->get_support() >= Parameter::Instance()->min_support && points[i]->get_length() > Parameter::Instance()->min_length) {
 			printer->printSV(points[i]);
 		}
 	}
+	//std::cout<<"Done"<<std::endl;
 }
 
 void add_events(Alignment *& tmp, std::vector<str_event> events, short type, long ref_space, IntervallContainer & bst, TNode *&root, long read_id) {
@@ -391,6 +385,8 @@ void add_events(Alignment *& tmp, std::vector<str_event> events, short type, lon
 		}
 		//	start.support[0].read_start.min = events[i].read_pos;
 
+		read.read_strand.first = tmp->getStrand();
+		read.read_strand.second = tmp->getStrand();
 		if (flag) {
 			std::cout << tmp->getName() << " " << tmp->getRefID() << " " << svs.start.min_pos << " " << svs.stop.max_pos << " " << svs.stop.max_pos - svs.start.min_pos << std::endl;
 		}
@@ -443,7 +439,7 @@ void add_events(Alignment *& tmp, std::vector<str_event> events, short type, lon
 void add_splits(Alignment *& tmp, std::vector<aln_str> events, short type, RefVector ref, IntervallContainer & bst, TNode *&root, long read_id) {
 	bool flag = (strcmp(tmp->getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0);
 
-	if (false) {
+	if (flag) {
 		cout << "SPLIT: " << std::endl;
 		for (size_t i = 0; i < events.size(); i++) {
 			std::cout << events[i].pos << " stop: " << events[i].pos + events[i].length << " " << events[i].RefID << " READ: " << events[i].read_pos_start << " " << events[i].read_pos_stop;
@@ -462,6 +458,9 @@ void add_splits(Alignment *& tmp, std::vector<aln_str> events, short type, RefVe
 		//read.name = tmp->getName();
 		read.type = type;
 		read.SV = 0;
+		read.read_strand.first = events[i - 1].strand;
+		read.read_strand.second = events[i].strand;
+
 		//stop.support.push_back(read);
 		if (events[i].RefID == events[i - 1].RefID) { //IF different chr -> tra
 			if (events[i - 1].strand == events[i].strand) { //IF same strand -> del/ins/dup
@@ -477,37 +476,45 @@ void add_splits(Alignment *& tmp, std::vector<aln_str> events, short type, RefVe
 				svs.read_start = events[i - 1].read_pos_stop; // (short) events[i - 1].read_pos_start + (short) events[i - 1].length;
 				svs.read_stop = events[i].read_pos_start;
 				if (events[i - 1].strand) {
-					//	len1 = events[i - 1].pos;
-					//	len2 = events[i].pos - events[i].length;
 					svs.start.min_pos = events[i - 1].pos + events[i - 1].length + get_ref_lengths(events[i - 1].RefID, ref);
 					svs.stop.max_pos = events[i].pos + get_ref_lengths(events[i].RefID, ref);
 				} else {
-					//	len1 = events[i].pos;
-					//	len2 = events[i - 1].pos - events[i - 1].length;
 					svs.start.min_pos = events[i].pos + events[i].length + get_ref_lengths(events[i].RefID, ref);
 					svs.stop.max_pos = events[i - 1].pos + get_ref_lengths(events[i - 1].RefID, ref);
 				}
 
 				if (flag) {
-					cout << "Debug: SV_Size: " << (svs.start.min_pos - svs.stop.max_pos) << " Ref_start: " << svs.start.min_pos - get_ref_lengths(events[i].RefID, ref) << " Ref_stop: " << svs.stop.max_pos - get_ref_lengths(events[i].RefID, ref) << " readstart: " << svs.read_start << " readstop: "
-							<< svs.read_stop << std::endl;
+					cout << "Debug: SV_Size: " << (svs.start.min_pos - svs.stop.max_pos) << " tmp: " << (svs.stop.max_pos - svs.start.min_pos) << " Ref_start: " << svs.start.min_pos - get_ref_lengths(events[i].RefID, ref) << " Ref_stop: " << svs.stop.max_pos - get_ref_lengths(events[i].RefID, ref) << " readstart: " << svs.read_start << " readstop: " << svs.read_stop << std::endl;
 				}
 
-				if ((svs.stop.max_pos - svs.start.min_pos) > -1 && ((svs.stop.max_pos - svs.start.min_pos) + (Parameter::Instance()->min_length) < (svs.read_stop - svs.read_start) && (svs.read_stop - svs.read_start) > (Parameter::Instance()->min_length * 2))) {
+				if ((svs.stop.max_pos - svs.start.min_pos) > Parameter::Instance()->min_length * -1 && ((svs.stop.max_pos - svs.start.min_pos) + (Parameter::Instance()->min_length) < (svs.read_stop - svs.read_start) && (svs.read_stop - svs.read_start) > (Parameter::Instance()->min_length * 2))) {
 					if (flag) {
 						cout << "INS: " << endl;
 					}
-					read.SV |= INS;
+
+					if (!events[i].cross_N || (double)((svs.stop.max_pos - svs.start.min_pos) + Parameter::Instance()->min_length) < ((double)(svs.read_stop - svs.read_start) * Parameter::Instance()->avg_ins)) {
+						svs.stop.max_pos += (svs.read_stop - svs.read_start); //TODO check!
+						read.SV |= INS;
+					} else {
+						read.SV |= 'n';
+					}
+
 				} else if ((svs.start.min_pos - svs.stop.max_pos) * -1 > (svs.read_stop - svs.read_start) + (Parameter::Instance()->min_length)) {
-					read.SV |= DEL;
-					if (flag) {
-						cout << "DEL" << endl;
+					//cout << "DEL1 "<<(double)(svs.start.min_pos - svs.stop.max_pos) * Parameter::Instance()->avg_del * -1.0  <<" "<<((svs.read_stop - svs.read_start) + (Parameter::Instance()->min_length)) << endl;
+					if (!events[i].cross_N || (double)(svs.start.min_pos - svs.stop.max_pos) * Parameter::Instance()->avg_del * -1.0 > (double)((svs.read_stop - svs.read_start) + (Parameter::Instance()->min_length))) {
+						read.SV |= DEL;
+						if (flag) {
+							cout << "DEL2" << endl;
+						}
+					} else {
+						read.SV |= 'n';
 					}
+
 				} else if ((svs.start.min_pos - svs.stop.max_pos) > Parameter::Instance()->min_length && (svs.read_start - svs.read_stop) < Parameter::Instance()->min_length) { //check with respect to the coords of reads!
-					read.SV |= DUP;
 					if (flag) {
 						cout << "DUP: " << endl;
 					}
+					read.SV |= DUP;
 				} else {
 					if (flag) {
 						cout << "N" << endl;
@@ -518,7 +525,9 @@ void add_splits(Alignment *& tmp, std::vector<aln_str> events, short type, RefVe
 
 				read.strand.first = events[i - 1].strand;
 				read.strand.second = !events[i].strand;
-				if (overlaps(events[i - 1], events[i])) {
+
+				bool is_overlapping = overlaps(events[i - 1], events[i]);
+				if (is_overlapping && (events[i - 1].length > Parameter::Instance()->min_segment_size || events[i].length > Parameter::Instance()->min_segment_size)) {
 					if (flag) {
 						std::cout << "Overlap curr: " << events[i].pos << " " << events[i].pos + events[i].length << " prev: " << events[i - 1].pos << " " << events[i - 1].pos + events[i - 1].length << " " << tmp->getName() << std::endl;
 					}
@@ -541,7 +550,7 @@ void add_splits(Alignment *& tmp, std::vector<aln_str> events, short type, RefVe
 					//		std::cout<<"NEST: "<<svs.start.min_pos- get_ref_lengths(events[i - 1].RefID, ref) << " "<<svs.stop.max_pos - get_ref_lengths(events[i - 1].RefID, ref)<<" "<<tmp->getName()<<std::endl;
 					//	}
 					//	svs.stop.max_pos = svs.start.min_pos + Parameter::Instance()->min_length * 2;
-				} else {
+				} else if (!is_overlapping) {
 					read.SV |= INV;
 					if (events[i - 1].strand) {
 						svs.start.min_pos = events[i - 1].pos + events[i - 1].length + get_ref_lengths(events[i - 1].RefID, ref);
@@ -557,6 +566,8 @@ void add_splits(Alignment *& tmp, std::vector<aln_str> events, short type, RefVe
 			read.strand.first = events[i - 1].strand;
 			read.strand.second = !events[i].strand;
 			if (events[i - 1].strand == events[i].strand) {
+				//check this with + - strands!!
+
 				if (events[i - 1].strand) {
 					svs.start.min_pos = events[i - 1].pos + events[i - 1].length + get_ref_lengths(events[i - 1].RefID, ref);
 					svs.stop.max_pos = events[i].pos + get_ref_lengths(events[i].RefID, ref);
@@ -616,6 +627,20 @@ void add_splits(Alignment *& tmp, std::vector<aln_str> events, short type, RefVe
 				read.coordinates.first = svs.start.min_pos;
 				read.coordinates.second = svs.stop.max_pos;
 			}
+			/*	if (read.SV & TRA) {
+			 std::cout << "SPLIT: " << TRANS_type(read.SV)<<" "<<svs.start.min_pos - get_ref_lengths(events[i].RefID, ref)<<" ";
+			 if (events[i - 1].strand) {
+			 std::cout << " +";
+			 } else {
+			 std::cout << " -";
+			 }
+			 if (events[i].strand) {
+			 std::cout << " +";
+			 } else {
+			 std::cout << " -";
+			 }
+			 std::cout <<std::endl;
+			 }*/
 			/*if(read.SV & INS){
 			 std::cout<<"Split LEN: "<<abs(read.coordinates.second-read.coordinates.first)<<" Pos: "<<read.coordinates.first-get_ref_lengths(events[i - 1].RefID, ref)<<tmp->getName()<<std::endl;
 			 }*/
@@ -630,8 +655,8 @@ void add_splits(Alignment *& tmp, std::vector<aln_str> events, short type, RefVe
 			Breakpoint * point = new Breakpoint(svs, abs(read.coordinates.second - read.coordinates.first));
 			//std::cout<<"split ADD: " << <<" Name: "<<tmp->getName()<<" "<< svs.start.min_pos- get_ref_lengths(events[i].RefID, ref)<<"-"<<svs.stop.max_pos- get_ref_lengths(events[i].RefID, ref)<<std::endl;
 			bst.insert(point, root);
-			//		std::cout<<"Print:"<<std::endl;
-			//		bst.print(root);
+			//	std::cout<<"Print:"<<std::endl;
+			//	bst.print(root);
 		}
 	}
 }
@@ -658,40 +683,46 @@ void estimate_parameters(std::string read_filename) {
 	vector<int> scores;
 //	std::string curr, prev = "";
 	double avg_dist = 0;
-	while (!tmp_aln->getQueryBases().empty() && num < 1000) {				//1000
+	double tot_avg_ins = 0;
+	double tot_avg_del = 0;
+	while (!tmp_aln->getQueryBases().empty() && num < 1000) {	//1000
+		//	std::cout<<"test "<<tmp_aln->getName()<<std::endl;
 		if (rand() % 100 < 20 && ((tmp_aln->getAlignment()->IsPrimaryAlignment()) && (!(tmp_aln->getAlignment()->AlignmentFlag & 0x800)))) {				//}&& tmp_aln->get_is_save()))) {
 			//1. check differences in window => min_treshold for scanning!
 			//2. get score ration without checking before hand! (above if!)
 			double dist = 0;
-			vector<int> tmp = tmp_aln->get_avg_diff(dist);
-			//std::cout<<dist<<std::endl;
+			double avg_del = 0;
+			double avg_ins = 0;
+			vector<int> tmp = tmp_aln->get_avg_diff(dist, avg_del, avg_ins);
+			tot_avg_ins += avg_ins;
+			tot_avg_del += avg_del;
+			//std::cout<<"Debug:\t"<<dist<<"\t";
 			avg_dist += dist;
+			double avg_mis = 0;
 			for (size_t i = 0; i < tmp.size(); i++) {
 				while (tmp[i] + 1 > mis_per_window.size()) { //adjust length
 					mis_per_window.push_back(0);
 				}
+				avg_mis += tmp[i];
 				mis_per_window[tmp[i]]++;
 			}
-			//avg_diffs_perwindow+=tmp_aln->get_avg_diff();
+			//	std::cout <<avg_mis/tmp.size()<<"\t";
 			//get score ratio
 			double score = round(tmp_aln->get_scrore_ratio());
-			while (score + 1 > scores.size()) {
-				scores.push_back(0);
+			//	std::cout<<score<<"\t"<<std::endl;;
+			if (score > -1) {
+				while (score + 1 > scores.size()) {
+					scores.push_back(0);
+				}
+				scores[score]++;
 			}
-			scores[score]++;
 			num++;
 		}
 
 		mapped_file->parseReadFast(Parameter::Instance()->min_mq, tmp_aln);
-		/*curr = tmp_aln->getName();
-		 if (curr.size() == prev.size() && strcmp(prev.c_str(), curr.c_str()) == 0) {
-		 std::cerr << "Read occurred twice with primary alignment marked. Possibly no eof recognized in bam file." << std::endl;
-		 break;
-		 }
-		 prev = curr;*/
 	}
 	if (num == 0) {
-		std::cerr << "No reads detected in " << Parameter::Instance()->bam_files[0] << std::endl;
+		std::cerr << "Too few reads detected in " << Parameter::Instance()->bam_files[0] << std::endl;
 		exit(1);
 	}
 	vector<int> nums;
@@ -699,17 +730,14 @@ void estimate_parameters(std::string read_filename) {
 	Parameter::Instance()->max_dist_alns = floor(avg_dist / num) / 2;
 	Parameter::Instance()->window_thresh = 50;			//25;
 	if (!mis_per_window.empty()) {
-
 		for (size_t i = 0; i < mis_per_window.size(); i++) {
-			if (mis_per_window[i] != 0) {
-				//		std::cout << i << ": " << mis_per_window[i] << std::endl;
-			}
+
 			for (size_t j = 0; j < mis_per_window[i]; j++) {
 				nums.push_back(i);
 			}
 		}
 		pos = nums.size() * 0.95; //the highest 5% cutoff
-		if (pos <= nums.size()) {
+		if (pos > 0 && pos <= nums.size()) {
 			Parameter::Instance()->window_thresh = std::max(Parameter::Instance()->window_thresh, nums[pos]); //just in case we have too clean data! :)
 		}
 		nums.clear();
@@ -722,20 +750,26 @@ void estimate_parameters(std::string read_filename) {
 	}
 	pos = nums.size() * 0.05; //the lowest 5% cuttoff
 	Parameter::Instance()->score_treshold = 2; //nums[pos]; //prev=2
+
+	Parameter::Instance()->avg_del = tot_avg_del / num;
+	Parameter::Instance()->avg_ins = tot_avg_ins / num;
+
 	std::cout << "\tMax dist between aln events: " << Parameter::Instance()->max_dist_alns << std::endl;
 	std::cout << "\tMax diff in window: " << Parameter::Instance()->window_thresh << std::endl;
 	std::cout << "\tMin score ratio: " << Parameter::Instance()->score_treshold << std::endl;
+	std::cout << "\tAvg DEL ratio: " << Parameter::Instance()->avg_del << std::endl;
+	std::cout << "\tAvg INS ratio: " << Parameter::Instance()->avg_ins << std::endl;
+
 }
 
 bool overlaps(aln_str prev, aln_str curr) {
 
 	double ratio = 0;
 	double overlap = 0;
-
 	if (prev.pos + Parameter::Instance()->min_length < curr.pos + curr.length && prev.pos + prev.length - Parameter::Instance()->min_length > curr.pos) {
 		overlap = min((curr.pos + curr.length), (prev.pos + prev.length)) - max(prev.pos, curr.pos);
 		ratio = overlap / (double) min(curr.length, prev.length);
 	}
-
+//	std::cout<<overlap<<" "<<ratio<<std::endl;
 	return (ratio > 0.4 && overlap > 200);
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/sniffles.git



More information about the debian-med-commit mailing list