[med-svn] [Git][med-team/sniffles][upstream] New upstream version 1.0.10+ds

Steffen Möller gitlab at salsa.debian.org
Mon Oct 15 18:20:26 BST 2018


Steffen Möller pushed to branch upstream at Debian Med / sniffles


Commits:
7418012d by Steffen Moeller at 2018-10-15T17:10:17Z
New upstream version 1.0.10+ds
- - - - -


21 changed files:

- + .project
- CMakeLists.txt
- README.md
- src/Alignment.cpp
- src/ArgParseOutput.h
- src/Genotyper/Genotyper.cpp
- src/Genotyper/Genotyper.h
- src/Paramer.h
- src/Parser.h
- src/Sniffles.cpp
- src/force_calling/Force_calling.cpp
- src/force_calling/VCF_parser.cpp
- src/print/BedpePrinter.cpp
- src/print/VCFPrinter.cpp
- src/sub/Breakpoint.cpp
- src/sub/Detect_Breakpoints.cpp
- src/tree/Breakpoint_Tree.cpp
- src/tree/Breakpoint_Tree.h
- src/tree/IntervallTree.cpp
- test_set/README
- + test_set/test.vcf


Changes:

=====================================
.project
=====================================
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>Sniffles</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+	</buildSpec>
+	<natures>
+	</natures>
+</projectDescription>


=====================================
CMakeLists.txt
=====================================
@@ -7,9 +7,9 @@ option(STATIC "Build static binary" OFF)
  set( SNIF_VERSION_MINOR 0 )
 IF(CMAKE_BUILD_TYPE STREQUAL "Debug")
     message(STATUS "Building in debug mode!")
-    set( SNIF_VERSION_BUILD 8-debug )
+    set( SNIF_VERSION_BUILD 10-debug )
 else()
-	set( SNIF_VERSION_BUILD 8 )
+	set( SNIF_VERSION_BUILD 10 )
 ENDIF()
 
 


=====================================
README.md
=====================================
@@ -4,6 +4,24 @@ Sniffles is a structural variation caller using third generation sequencing (Pac
 
 Please see our github wiki for more information (https://github.com/fritzsedlazeck/Sniffles/wiki)
 
+
+# How to build Sniffles
+<pre>wget https://github.com/fritzsedlazeck/Sniffles/archive/master.tar.gz -O Sniffles.tar.gz
+tar xzvf Sniffles.tar.gz
+cd Sniffles-master/
+mkdir -p build/
+cd build/
+cmake ..
+make
+
+cd ../bin/sniffles*
+./sniffles</pre>
+
+Note Mac users often have to provide parameters to the cmake command:
+<pre>cmake -D CMAKE_C_COMPILER=/opt/local/bin/gcc-mp-4.7 -D CMAKE_CXX_COMPILER=/opt/local/bin/g++-mp-4.7 .. 
+</pre>
+
+
 **************************************
 ## NGMLR
 Sniffles performs best with the mappings of NGMLR our novel long read mapping method. 
@@ -12,8 +30,8 @@ https://github.com/philres/ngmlr
 
 ****************************************
 ## Citation:
-Please see and cite our preprint:
-http://www.biorxiv.org/content/early/2017/07/28/169557
+Please see and cite our paper:
+https://www.nature.com/articles/s41592-018-0001-7
   
 **************************************
 ## Poster & Talks:


=====================================
src/Alignment.cpp
=====================================
@@ -75,7 +75,7 @@ vector<differences_str> Alignment::summarize_csstring(std::vector<indel_str> &de
 
 	vector<differences_str> events;
 	differences_str ev;
-	std::cout<<"CS: "<<cs<<std::endl;
+	std::cout << "CS: " << cs << std::endl;
 	for (size_t i = 0; i < cs.size() && pos < max_size; i++) {
 		ev.position = pos;
 		if (cs[i] == ':') { //match (can be numbers or bp!)
@@ -88,7 +88,7 @@ vector<differences_str> Alignment::summarize_csstring(std::vector<indel_str> &de
 					num++;
 				}
 			}
-			cout<<"\tMatch: "<<num<<std::endl;
+			cout << "\tMatch: " << num << std::endl;
 			pos += num;
 		} else if (cs[i] == '*') { //mismatch (ref,alt) pairs
 			//only every second char counts!
@@ -96,7 +96,7 @@ vector<differences_str> Alignment::summarize_csstring(std::vector<indel_str> &de
 			pos++;
 			i += 2;
 			ev.type = 0; //mismatch
-			cout<<"\tMiss: "<<1<<std::endl;
+			cout << "\tMiss: " << 1 << std::endl;
 		} else if (cs[i] == '-') { //del
 			//collet del seq in dels!
 			indel_str del;
@@ -108,21 +108,21 @@ vector<differences_str> Alignment::summarize_csstring(std::vector<indel_str> &de
 			dels.push_back(del);
 			pos += del.sequence.size();
 			ev.type = del.sequence.size();
-			cout<<"\tDEL: "<<del.sequence.size()<<std::endl;
+			cout << "\tDEL: " << del.sequence.size() << std::endl;
 
 		} else if (cs[i] == '+') { //ins
-			int num=0;
+			int num = 0;
 			while ((i < cs.size() && cs[i] != '+') && (cs[i] != '-' && cs[i] != '+')) {
 				i++;
 				num++;
 			}
-			ev.type =num*-1;
-			cout<<"\tINS: "<<num<<std::endl;
+			ev.type = num * -1;
+			cout << "\tINS: " << num << std::endl;
 		}
 		events.push_back(ev);
 
 	}
-	std::cout<<"end CS: "<<cs<<std::endl;
+	std::cout << "end CS: " << cs << std::endl;
 	return events;
 }
 
@@ -136,6 +136,7 @@ vector<differences_str> Alignment::summarizeAlignment(std::vector<indel_str> &de
 	if (al->CigarData[0].Type == 'S') {
 		read_pos += al->CigarData[0].Length;
 	}
+
 	for (size_t i = 0; i < al->CigarData.size(); i++) {
 		if (al->CigarData[i].Type == 'M') {
 			pos += al->CigarData[i].Length;
@@ -176,15 +177,16 @@ vector<differences_str> Alignment::summarizeAlignment(std::vector<indel_str> &de
 		}
 	}
 
-	/*if (flag) {
-	 std::cout << "FIRST:" << std::endl;
-	 for (size_t i = 0; i < events.size(); i++) {
-	 if (abs(events[i].type) > 200) {
-	 cout << events[i].position << " " << events[i].type << endl;
-	 }
-	 }
-	 cout << endl;
-	 }*/
+	//exit(0);
+	if (flag) {
+		std::cout << "FIRST:" << std::endl;
+		for (size_t i = 0; i < events.size(); i++) {
+			// if (abs(events[i].type) > 200) {
+			cout << events[i].position << " " << events[i].type << endl;
+			// }
+		}
+		cout << endl;
+	}
 
 //set ref length requ. later on:
 	this->ref_len = pos - getPosition(); //TODO compare to get_length!
@@ -676,6 +678,7 @@ bool Alignment::overlapping_segments(vector<aln_str> entries) {
 	return (entries.size() == 2 && abs(entries[0].pos - entries[1].pos) < 100);
 }
 vector<aln_str> Alignment::getSA(RefVector ref) {
+
 	string sa;
 	vector<aln_str> entries;
 	if (al->GetTag("SA", sa) && !sa.empty()) {
@@ -690,7 +693,7 @@ vector<aln_str> Alignment::getSA(RefVector ref) {
 		uint32_t sv;
 		al->GetTag("SV", sv);
 		tmp.cross_N = ((sv & Ns_CLIPPED));
-		bool flag = strcmp(getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0;
+		bool flag = strcmp(getName().c_str(),"0bac61ef-7819-462b-ae3d-32c68fe580c0")==0; //Parameter::Instance()->read_name.c_str()) == 0;
 
 		get_coords(tmp, tmp.read_pos_start, tmp.read_pos_stop);
 		if (flag) {
@@ -791,8 +794,7 @@ vector<aln_str> Alignment::getSA(RefVector ref) {
 double Alignment::get_scrore_ratio() {
 	uint score = -1;
 	uint subscore = -1;
-	if (al->GetTag("AS", score)) {
-		al->GetTag("XS", subscore);
+	if (al->GetTag("AS", score) && al->GetTag("XS", subscore)) {
 		if (subscore == 0) {
 			subscore = 1;
 		}
@@ -804,6 +806,7 @@ bool Alignment::get_is_save() {
 	string sa;
 
 	double score = get_scrore_ratio(); //TODO should I use this again for bwa?
+//	cout<<score<<endl;
 
 	return !((al->GetTag("XA", sa) && !sa.empty()) || (al->GetTag("XT", sa) && !sa.empty())) && (score == -1 || score > Parameter::Instance()->score_treshold); //|| //TODO: 7.5
 }
@@ -921,8 +924,9 @@ std::string Alignment::get_md() {
 	std::string md;
 	if (al->GetTag("MD", md)) {
 		return md;
-	}else{
-		std::cerr<<"No MD string detected! Check bam file! Otherwise generate using e.g. samtools."<<std::endl;
+	} else {
+		std::cerr << "No MD string detected! Check bam file! Otherwise generate using e.g. samtools." << std::endl;
+		cout<<"MD: TEST" << this->getName()<<endl;
 		exit(0);
 	}
 	return md;
@@ -932,8 +936,8 @@ std::string Alignment::get_cs() {
 	std::string cs;
 	if (al->GetTag("cs", cs)) {
 		return cs;
-	}else{
-		std::cerr<<"No CS string detected! Check bam file!"<<std::endl;
+	} else {
+		std::cerr << "No CS string detected! Check bam file!" << std::endl;
 		exit(0);
 	}
 	return cs;
@@ -1056,6 +1060,8 @@ vector<int> Alignment::get_avg_diff(double & dist, double & avg_del, double & av
 //computeAlignment();
 //cout<<alignment.first<<endl;
 //cout<<alignment.second<<endl;
+	avg_del = 0;
+	avg_ins = 0;
 	vector<int> mis_per_window;
 	std::vector<indel_str> dels;
 	vector<differences_str> event_aln = summarizeAlignment(dels);
@@ -1070,34 +1076,37 @@ vector<int> Alignment::get_avg_diff(double & dist, double & avg_del, double & av
 	double del = 0;
 	double ins = 0;
 	double mis = 0;
-	double length = event_aln[event_aln.size() - 1].position - event_aln[0].position;
-	for (size_t i = 0; i < event_aln.size(); i++) {
-		if (i != 0) {
-			dist += event_aln[i].position - event_aln[i - 1].position;
-		}
+	if (event_aln.size() > 1) {
+		double length = event_aln[event_aln.size() - 1].position - event_aln[0].position;
+		for (size_t i = 0; i < event_aln.size(); i++) {
+			if (i != 0) {
+				dist += event_aln[i].position - event_aln[i - 1].position;
+			}
 
-		pair_str tmp;
-		tmp.position = -1;
-		if (event_aln[i].type == 0) {
-			tmp = plane->add_mut(event_aln[i].position, 1, min_tresh);
-		} else {
-			tmp = plane->add_mut(event_aln[i].position, abs(event_aln[i].type), min_tresh);
-		}
-		if (tmp.position != -1) { //check that its not the prev event!
-			mis_per_window.push_back(tmp.coverage); //store #mismatch per window each time it exceeds. (which might be every event position!)
-		}
-		if (event_aln[i].type > 0) {
-			avg_del += event_aln[i].type;
-		} else if (event_aln[i].type < 0) {
-			avg_ins += event_aln[i].type * -1;
+			pair_str tmp;
+			tmp.position = -1;
+			if (event_aln[i].type == 0) {
+				tmp = plane->add_mut(event_aln[i].position, 1, min_tresh);
+			} else {
+				tmp = plane->add_mut(event_aln[i].position, abs(event_aln[i].type), min_tresh);
+			}
+			if (tmp.position != -1) { //check that its not the prev event!
+				mis_per_window.push_back(tmp.coverage); //store #mismatch per window each time it exceeds. (which might be every event position!)
+			}
+			if (event_aln[i].type > 0) {
+				avg_del += event_aln[i].type;
+			} else if (event_aln[i].type < 0) {
+				avg_ins += event_aln[i].type * -1;
+			}
 		}
-	}
+		//cout << "len: " << length << endl;
+		avg_ins = avg_ins / length;
+		avg_del = avg_del / length;
 
-	avg_ins = avg_ins / length;
-	avg_del = avg_del / length;
-
-	dist = dist / (double) event_aln.size();
+		dist = dist / (double) event_aln.size();
+	}
 	plane->finalyze();
+
 	return mis_per_window;	//total_num /num;
 }
 
@@ -1108,12 +1117,12 @@ vector<str_event> Alignment::get_events_Aln() {
 //clock_t comp_aln = clock();
 	std::vector<indel_str> dels;
 	vector<differences_str> event_aln;
-	if (Parameter::Instance()->cs_string) {
-		cout<<"run cs check "<<std::endl;
-		event_aln = summarize_csstring(dels);
-	} else {
-		event_aln = summarizeAlignment(dels);
-	}
+	/*	if (Parameter::Instance()->cs_string) {
+	 cout<<"run cs check "<<std::endl;
+	 event_aln = summarize_csstring(dels);
+	 } else {*/
+	event_aln = summarizeAlignment(dels);
+//	}
 //double time2 = Parameter::Instance()->meassure_time(comp_aln, "\tcompAln Events: ");
 
 	vector<str_event> events;


=====================================
src/ArgParseOutput.h
=====================================
@@ -11,8 +11,7 @@ using std::cerr;
 using std::cout;
 using std::endl;
 
-class ArgParseOutput : public TCLAP::StdOutput
-{
+class ArgParseOutput: public TCLAP::StdOutput {
 private:
 
 	std::string usageStr;
@@ -36,6 +35,8 @@ public:
 		cerr << endl;
 		cerr << "Short usage:" << endl;
 		cerr << "       sniffles [options] -m <sorted.bam> -v <output.vcf> " << endl;
+		cerr << "Version: " << Parameter::Instance()->version << std::endl;
+		cerr << "Contact: fritz.sedlazeck at gmail.com" << std::endl;
 		cerr << endl;
 		cerr << "For complete USAGE and HELP type:" << endl;
 		cerr << "    sniffles --help" << endl;


=====================================
src/Genotyper/Genotyper.cpp
=====================================
@@ -13,25 +13,32 @@ std::string Genotyper::assess_genotype(int ref, int support) {
 	if (allele < Parameter::Instance()->min_allelel_frequency) {
 		return "";
 	}
+	if((support + ref)==0){
+		allele=0;
+	}
 
 	std::stringstream ss;
 	ss << ";AF=";
 	ss << allele;
 	ss << "\tGT:DR:DV\t";
-	if (allele > Parameter::Instance()->homfreq) {
-		ss << "1/1:";
+	if(ref==0 && support==0){
+		ss << "./."; //we cannot define it.
+	}else if (allele > Parameter::Instance()->homfreq) {
+		ss << "1/1";
 	} else if (allele > Parameter::Instance()->hetfreq) {
-		ss << "0/1:";
+		ss << "0/1";
 	} else {
-		ss << "0/0:";
+		ss << "0/0";
 	}
+	ss<<":";
 	ss << ref;
 	ss << ":";
 	ss << support;
 	return ss.str();
 }
 
-std::string Genotyper::mod_breakpoint_vcf(string buffer, int ref) {
+std::string Genotyper::mod_breakpoint_vcf(string buffer, std::pair<int, int> ref_strand) {
+	int ref=ref_strand.first+ref_strand.second;
 	//find last of\t
 	//parse #reads supporting
 	//print #ref
@@ -41,6 +48,12 @@ std::string Genotyper::mod_breakpoint_vcf(string buffer, int ref) {
 	pos = buffer.find_last_of("GT");
 	//tab
 	entry = buffer.substr(0, pos - 2);
+	std::stringstream ss;
+	ss << ";REF_strand=";
+	ss << ref_strand.first;
+	ss << ",";
+	ss << ref_strand.second;
+	entry +=ss.str();
 
 	buffer = buffer.substr(pos + 1);		// the right part is only needed:
 	pos = buffer.find_last_of(':');
@@ -54,8 +67,9 @@ std::string Genotyper::mod_breakpoint_vcf(string buffer, int ref) {
 
 }
 
-std::string Genotyper::mod_breakpoint_bedpe(string buffer, int ref) {
+std::string Genotyper::mod_breakpoint_bedpe(string buffer, std::pair<int,int> ref_strand) {
 
+	int ref=ref_strand.first+ref_strand.second;
 	std::string tmp = buffer;
 	std::string entry = tmp;
 	entry += '\t';
@@ -198,11 +212,24 @@ void Genotyper::update_file(Breakpoint_Tree & tree, breakpoint_node *& node) {
 				tmp = get_breakpoint_bedpe(buffer);
 			}
 
-			int ref = max(tree.get_ref(node, tmp.chr, tmp.pos), tree.get_ref(node, tmp.chr2, tmp.pos2));
+			std::pair<int,int> first_node=tree.get_ref(node, tmp.chr, tmp.pos);
+			std::pair<int,int> second_node=tree.get_ref(node, tmp.chr2, tmp.pos2);
+
+			std::pair<int,int> final_ref;
+			if(first_node.first+first_node.second>second_node.first+second_node.second){
+				final_ref=first_node;
+			}else{
+				final_ref=second_node;
+			}
+
+			if(final_ref.first==-1){
+				std::cerr<<"Error in GT: Tree node not found. Exiting."<<std::endl;
+				exit(0);
+			}
 			if (is_vcf) {
-				to_print = mod_breakpoint_vcf(buffer, ref);
+				to_print = mod_breakpoint_vcf(buffer,final_ref);
 			} else {
-				to_print = mod_breakpoint_bedpe(buffer, ref);
+				to_print = mod_breakpoint_bedpe(buffer,final_ref);
 			}
 			if (!to_print.empty()) {
 				fprintf(file, "%s", to_print.c_str());
@@ -266,7 +293,7 @@ std::vector<std::string> Genotyper::read_SVs(Breakpoint_Tree & tree, breakpoint_
 			tree.insert(node, tmp.chr, tmp.pos, true); //true: start;
 			tree.insert(node, tmp.chr2, tmp.pos2, false); //false: stop;//
 			num_sv++;
-			if (num_sv % 1000 == 0) {
+			if (num_sv % 5000 == 0) {
 				cout << "\t\tRead in SV: " << num_sv << endl;
 			}
 		} else if (buffer[2] == 'c' && buffer[3] == 'o') { //##contig=<ID=chr1,length=699930>
@@ -300,7 +327,11 @@ void Genotyper::compute_cov(Breakpoint_Tree & tree, breakpoint_node *& node, std
 			cout << "\t\tScanning CHR " << ref_dict[tmp.chr_id] << endl;
 			prev_id = tmp.chr_id;
 		}
-		tree.overalps(tmp.start, tmp.start + tmp.length, ref_dict[tmp.chr_id], node);
+		if (tmp.strand == 1) {		//strand of read
+			tree.overalps(tmp.start, tmp.start + tmp.length, ref_dict[tmp.chr_id], node, true);
+		} else {
+			tree.overalps(tmp.start, tmp.start + tmp.length, ref_dict[tmp.chr_id], node, false);
+		}
 		nbytes = fread(&tmp, sizeof(struct str_read), 1, ref_allel_reads);
 	}
 	fclose(ref_allel_reads);


=====================================
src/Genotyper/Genotyper.h
=====================================
@@ -25,8 +25,8 @@ private:
 	void update_file(Breakpoint_Tree & tree,breakpoint_node *& node);
 	variant_str get_breakpoint_vcf(string buffer);
 	variant_str get_breakpoint_bedpe(string buffer);
-	std::string mod_breakpoint_vcf(string buffer, int ref);
-	std::string mod_breakpoint_bedpe(string buffer, int ref);
+	std::string mod_breakpoint_vcf(string buffer, std::pair<int,int> ref);
+	std::string mod_breakpoint_bedpe(string buffer, std::pair<int,int> ref);
 	void parse_pos(char * buffer, int & pos, std::string & chr);
 
 


=====================================
src/Paramer.h
=====================================
@@ -25,7 +25,7 @@ class Parameter {
 private:
 	Parameter() {
 		window_thresh=10;//TODO check!
-		version="1.0.8";
+		version="1.0.10";
 		huge_ins = 999;//TODO check??
 	}
 	~Parameter() {
@@ -84,6 +84,7 @@ public:
 	bool change_coords; //indicating for --Ivcf
 	bool skip_parameter_estimation;
 	bool cs_string;
+	bool read_strand;
 
 	void set_regions(std::string reg) {
 		size_t i = 0;


=====================================
src/Parser.h
=====================================
@@ -14,6 +14,7 @@ struct str_read{
 	uint chr_id;
 	uint start;
 	ushort length;
+	ushort strand; //1=forward, 2=backward;
 };
 
 class Parser {


=====================================
src/Sniffles.cpp
=====================================
@@ -2,16 +2,16 @@
 // Name        : Sniffles.cpp
 // Author      : Fritz Sedlazeck
 // Version     :
-// Copyright   : Your copyright notice
-// Description : Hello World in C++, Ansi-style
+// Copyright   : MIT License
+// Description : Detection of SVs for long read data.
 //============================================================================
-// phil: cd ~/hetero/philipp/pacbio/example-svs/reads
 //For mac: cmake -D CMAKE_C_COMPILER=/opt/local/bin/gcc-mp-4.7 -D CMAKE_CXX_COMPILER=/opt/local/bin/g++-mp-4.7 ..
 #include <iostream>
 #include "Paramer.h"
 #include <tclap/CmdLine.h>
 #include <unistd.h>
 #include <omp.h>
+
 #include "Genotyper/Genotyper.h"
 #include "realign/Realign.h"
 #include "sub/Detect_Breakpoints.h"
@@ -25,7 +25,9 @@
 #include "ArgParseOutput.h"
 #include "force_calling/Force_calling.h"
 
-//cmake -D CMAKE_C_COMPILER=/opt/local/bin/gcc-mp-4.7 -D CMAKE_CXX_COMPILER=/opt/local/bin/g++-mp-4.7 ..
+//cmake -D CMAKE_C_COMPILER=/usr/local/bin/gcc-8 -D CMAKE_CXX_COMPILER=/usr/local/bin/g++-8 ..
+
+
 
 //TODO:
 //check strand headers.
@@ -86,12 +88,12 @@ void read_parameters(int argc, char *argv[]) {
 	TCLAP::SwitchArg arg_genotype("", "genotype", "Enables Sniffles to compute the genotypes.", cmd, false);
 	TCLAP::SwitchArg arg_cluster("", "cluster", "Enables Sniffles to phase SVs that occur on the same reads", cmd, false);
 	TCLAP::SwitchArg arg_std("", "ignore_sd", "Ignores the sd based filtering. ", cmd, false);
-	TCLAP::SwitchArg arg_bnd("", "report_BND", "Report BND instead of Tra in vcf output. ", cmd, false);
+	TCLAP::SwitchArg arg_bnd("", "report_BND", "Dont report BND instead use Tra in vcf output. ", cmd, true);
 	TCLAP::SwitchArg arg_seq("", "report_seq", "Report sequences for indels in vcf output. (Beta version!) ", cmd, false);
 	TCLAP::SwitchArg arg_coords("", "change_coords", "Adopt coordinates for force calling if finding evidence. ", cmd, false);
 	TCLAP::SwitchArg arg_parameter("", "skip_parameter_estimation", "Enables the scan if only very few reads are present. ", cmd, false);
 	TCLAP::SwitchArg arg_cs_string("", "cs_string", "Enables the scan of CS string instead of Cigar and MD. ", cmd, false);
-
+	TCLAP::SwitchArg arg_read_strand("", "report_read_strands", "Enables the report of the strand categories per read. (Beta) ", cmd, false);
 
 	TCLAP::ValueArg<float> arg_allelefreq("f", "allelefreq", "Threshold on allele frequency (0-1). ", false, 0.0, "float", cmd);
 	TCLAP::ValueArg<float> arg_hetfreq("", "min_het_af", "Threshold on allele frequency (0-1). ", false, 0.3, "float", cmd);
@@ -102,9 +104,11 @@ void read_parameters(int argc, char *argv[]) {
 	std::stringstream usage;
 	usage << "" << std::endl;
 	usage << "Usage: sniffles [options] -m <sorted.bam> -v <output.vcf> " << std::endl;
-
-	usage << "" << std::endl;
+	usage << "Version: "<<Parameter::Instance()->version << std::endl;
+	usage << "Contact: fritz.sedlazeck at gmail.com" << std::endl;
+	usage  << std::endl;
 	usage << "Input/Output:" << std::endl;
+
 	printParameter<std::string>(usage, arg_bamfile);
 	printParameter<std::string>(usage, arg_vcf);
 	printParameter<std::string>(usage, arg_bedpe);
@@ -138,6 +142,7 @@ void read_parameters(int argc, char *argv[]) {
 	printParameter(usage, arg_bnd);
 	printParameter(usage, arg_seq);
 	printParameter(usage, arg_std);
+	printParameter(usage,arg_read_strand);
 
 	usage << "" << std::endl;
 	usage << "Parameter estimation:" << std::endl;
@@ -174,7 +179,7 @@ void read_parameters(int argc, char *argv[]) {
 	Parameter::Instance()->change_coords = arg_coords.getValue();
 	Parameter::Instance()->debug = true;
 	Parameter::Instance()->score_treshold = 10;
-	Parameter::Instance()->read_name = " "; //21_16296949_+";//21_40181680_-";//m151102_123142_42286_c100922632550000001823194205121665_s1_p0/80643/0_20394"; //"22_36746138"; //just for debuging reasons!
+	Parameter::Instance()->read_name = "0bac61ef-7819-462b-ae3d-32c68fe580c0"; //21_16296949_+";//21_40181680_-";//m151102_123142_42286_c100922632550000001823194205121665_s1_p0/80643/0_20394"; //"22_36746138"; //just for debuging reasons!
 	Parameter::Instance()->bam_files.push_back(arg_bamfile.getValue());
 	Parameter::Instance()->min_mq = arg_mq.getValue();
 	Parameter::Instance()->output_vcf = arg_vcf.getValue();
@@ -200,6 +205,7 @@ void read_parameters(int argc, char *argv[]) {
 	Parameter::Instance()->hetfreq = arg_hetfreq.getValue();
 	Parameter::Instance()->skip_parameter_estimation = arg_parameter.getValue();
 	Parameter::Instance()->cs_string = arg_cs_string.getValue();
+	Parameter::Instance()->read_strand=arg_read_strand.getValue();
 
 	if (Parameter::Instance()->skip_parameter_estimation) {
 		cout<<"\tSkip parameter estimation."<<endl;


=====================================
src/force_calling/Force_calling.cpp
=====================================
@@ -39,29 +39,33 @@ void fill_tree(IntervallTree & final, TNode *& root_final, RefVector ref, std::m
 	//parse VCF file
 	std::vector<strvcfentry> entries = parse_vcf(Parameter::Instance()->input_vcf, 0);
 	std::cout << "\t\t" << entries.size() << " SVs found in input." << std::endl;
-	int invalid_svs=0;
+	int invalid_svs = 0;
 	for (size_t i = 0; i < entries.size(); i++) {
 		if (entries[i].type != -1) {
 			position_str svs;
-			//cout<<"start: "<<entries[i].start.chr << " stop "<<entries[i].stop.chr<<endl;
+
+			if (ref_lens.find(entries[i].start.chr) == ref_lens.end()) { // check why this is not called!
+				cerr << "Error undefined CHR in VCF vs. BAM header: " << entries[i].start.chr << endl;
+				exit(0);
+			}
+			if (ref_lens.find(entries[i].stop.chr) == ref_lens.end()) {
+				cerr << "Error undefined CHR in VCF vs. BAM header: " << entries[i].stop.chr << endl;
+				exit(0);
+			}
 			svs.start.min_pos = (long) entries[i].start.pos + ref_lens[entries[i].start.chr];
 			svs.stop.max_pos = (long) entries[i].stop.pos + ref_lens[entries[i].stop.chr];
 			read_str read;
-			if(ref_lens.find(entries[i].start.chr)==ref_lens.end()){
-				cerr<<"Warning undefined CHR in VCF vs. BAM header: "<<entries[i].start.chr<<endl;
-			}
-			if(ref_lens.find(entries[i].stop.chr)==ref_lens.end()){
-				cerr<<"Warning undefined CHR in VCF vs. BAM header: "<<entries[i].stop.chr<<endl;
-			}
+
 			read.coordinates.first = (long) entries[i].start.pos + ref_lens[entries[i].start.chr];
 			read.coordinates.second = (long) entries[i].stop.pos + ref_lens[entries[i].stop.chr];
 			if (entries[i].type == 4) { //ins?
-				if(entries[i].sv_len== Parameter::Instance()->huge_ins){
+				if (entries[i].sv_len == Parameter::Instance()->huge_ins) {
 					entries[i].sv_len++; // bad hack!
 				}
+
 				svs.stop.max_pos += (long) entries[i].sv_len;
-				read.coordinates.second+=(long) entries[i].sv_len;
-			//	cout << "Parse: " << entries[i].start.pos << " " << entries[i].stop.pos << " " << svs.start.min_pos  <<" "<<svs.stop.max_pos  << endl;
+				read.coordinates.second += (long) entries[i].sv_len;
+				//	cout << "Parse: " << entries[i].start.pos << " " << entries[i].stop.pos << " " << svs.start.min_pos  <<" "<<svs.stop.max_pos  << endl;
 			}
 
 			read.SV = assign_type(entries[i].type);
@@ -69,17 +73,18 @@ void fill_tree(IntervallTree & final, TNode *& root_final, RefVector ref, std::m
 			read.type = 2; //called
 			read.length = entries[i].sv_len; //svs.stop.max_pos-svs.start.min_pos;//try
 			svs.support["input"] = read;
+			//	cout<<"Submit: "<<entries[i].type <<endl;
 			Breakpoint * br = new Breakpoint(svs, (long) entries[i].sv_len, read.SV);
 			final.insert(br, root_final);
 		} else {
 			invalid_svs++;
-
 		}
 	}
-	cerr << "Invalid types found skipping " << invalid_svs    <<" entries." << endl;
+	cerr << "\tInvalid types found skipping " << invalid_svs << " entries." << endl;
 	//std::cout << "Print:" << std::endl;
 	//final.print(root_final);
 	entries.clear();
+	//exit(0);
 }
 
 void force_calling(std::string bam_file, IPrinter *& printer) {
@@ -106,7 +111,7 @@ void force_calling(std::string bam_file, IPrinter *& printer) {
 	fill_tree(final, root_final, ref, ref_lens);
 
 	int current_RefID = 0;
-	std::cout << "Start parsing: Chr "<<ref[current_RefID].RefName << std::endl;
+	std::cout << "Start parsing: Chr " << ref[current_RefID].RefName << std::endl;
 
 	//FILE * alt_allel_reads;
 	FILE * ref_allel_reads;
@@ -210,5 +215,5 @@ void force_calling(std::string bam_file, IPrinter *& printer) {
 		points[i]->predict_SV();
 		printer->printSV(points[i]); //redo! Ignore min support + STD etc.
 	}
-	std::cout << "Fin!" << std::endl;
+
 }


=====================================
src/force_calling/VCF_parser.cpp
=====================================
@@ -354,7 +354,7 @@ std::vector<strvcfentry> parse_vcf(std::string filename, int min_svs) {
 				if (count == 7 && strncmp(&buffer[i], ";STRANDS=", 9) == 0) {
 					set_strand = true;
 					tmp.strands.first = (bool) (buffer[i + 9] == '+');
-					tmp.strands.second = (bool) (buffer[i + 10] == '+');
+					tmp.strands.second = (bool)(buffer[i + 10] == '+');
 				}
 
 				if (count == 9 && buffer[i - 1] == '\t') { //parsing genotype;
@@ -431,7 +431,6 @@ std::vector<strvcfentry> parse_vcf(std::string filename, int min_svs) {
 			}
 			if ((strcmp(tmp.start.chr.c_str(), tmp.stop.chr.c_str()) != 0 || (tmp.sv_len >= min_svs))) { // || tmp.type==4
 
-
 				if (tmp.type == 5) { //BND
 					if (strcmp(tmp.stop.chr.c_str(), tmp.start.chr.c_str()) == 0) {
 						tmp.type = 2;


=====================================
src/print/BedpePrinter.cpp
=====================================
@@ -8,7 +8,7 @@
 #include "BedpePrinter.h"
 
 void BedpePrinter::print_header() {
-	fprintf(file, "%s", "#Chrom\tstart\tstop\tchrom2\tstart2\tstop2\tvariant_name/ID\tscore (smaller is better)\tstrand1\tstrand2\ttype\tnumber_of_split_reads\tbest_chr1\tbest_start\tbest_chr2\tbest_stop\tpredicted_length\tFILTER\n");
+	fprintf(file, "%s", "#Chrom\tstart\tstop\tchrom2\tstart2\tstop2\tvariant_name/ID\tscore (smaller is better)\tstrand1\tstrand2\ttype\tnumber_of_supporting_reads\tbest_chr1\tbest_start\tbest_chr2\tbest_stop\tpredicted_length\tFILTER\n");
 }
 void BedpePrinter::print_body(Breakpoint * &SV, RefVector ref) {
 	if (!this->bed_tree.is_in(SV->get_coordinates().start.most_support, this->root) && !this->bed_tree.is_in(SV->get_coordinates().stop.most_support, this->root)) {
@@ -103,8 +103,6 @@ void BedpePrinter::print_body(Breakpoint * &SV, RefVector ref) {
 				fprintf(file, "%s", "\tIMPRECISE");
 			}
 
-			//fprintf(file, "%c", '\t');
-			//fprintf(file, "%i", SV->get_support());
 			fprintf(file, "%c", '\n');
 		}
 	}


=====================================
src/print/VCFPrinter.cpp
=====================================
@@ -8,7 +8,7 @@
 #include "VCFPrinter.h"
 
 void VCFPrinter::print_header() {
-	fprintf(file, "%s", "##fileformat=VCFv4.2\n");
+	fprintf(file, "%s", "##fileformat=VCFv4.3\n");
 	fprintf(file, "%s", "##source=Sniffles\n");
 	string time = currentDateTime();
 	fprintf(file, "%s", "##fileDate=");
@@ -37,18 +37,28 @@ void VCFPrinter::print_header() {
 	fprintf(file, "%s", "##INFO=<ID=RE,Number=1,Type=Integer,Description=\"read support\">\n");
 	fprintf(file, "%s", "##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description=\"Imprecise structural variation\">\n");
 	fprintf(file, "%s", "##INFO=<ID=PRECISE,Number=0,Type=Flag,Description=\"Precise structural variation\">\n");
+	fprintf(file, "%s", "##INFO=<ID=UNRESOLVED,Number=0,Type=Flag,Description=\"An insertion that is longer than the read and thus we cannot predict the full size.\">\n");
 	fprintf(file, "%s", "##INFO=<ID=SVLEN,Number=1,Type=Integer,Description=\"Length of the SV\">\n");
+	fprintf(file, "%s", "##INFO=<ID=REF_strand,Number=2,Type=Integer,Description=\"Length of the SV\">\n");
 	fprintf(file, "%s", "##INFO=<ID=SVMETHOD,Number=1,Type=String,Description=\"Type of approach used to detect SV\">\n");
 	fprintf(file, "%s", "##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">\n");
 	if (Parameter::Instance()->report_n_reads > 0 || Parameter::Instance()->report_n_reads == -1) {
-		fprintf(file, "%s", "##INFO=<ID=RNAMES,Number=1,Type=String,Description=\"Names of reads supporting SVs (comma seperated)\">\n");
+		fprintf(file, "%s", "##INFO=<ID=RNAMES,Number=1,Type=String,Description=\"Names of reads supporting SVs (comma separated)\">\n");
 	}
+	if (Parameter::Instance()->print_seq) {
+		fprintf(file, "%s", "##INFO=<ID=SEQ,Number=1,Type=String,Description=\"Extracted sequence from the best representative read.\">\n");
+	}
+
+	if (Parameter::Instance()->read_strand) {
+		fprintf(file, "%s", "##INFO=<ID=STRANDS2,Number=4,Type=Integer,Description=\"alt reads first + ,alt reads first -,alt reads second + ,alt reads second -.\">\n");
+		fprintf(file, "%s", "##INFO=<ID=REF_strand,Number=2,Type=Integer,Description=\"plus strand ref, minus strand ref.\">\n");
+	}
+
 	fprintf(file, "%s", "##INFO=<ID=STD_quant_start,Number=A,Type=Integer,Description=\"STD of the start breakpoints across the reads.\">\n");
 	fprintf(file, "%s", "##INFO=<ID=STD_quant_stop,Number=A,Type=Integer,Description=\"STD of the stop breakpoints across the reads.\">\n");
-	fprintf(file, "%s", "##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Integer,Description=\"Kurtosis value of the start breakpoints accross the reads.\">\n");
-	fprintf(file, "%s", "##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Integer,Description=\"Kurtosis value of the stop breakpoints accross the reads.\">\n");
-	fprintf(file, "%s", "##INFO=<ID=SUPTYPE,Number=1,Type=String,Description=\"Type by which the variant is supported.(SR,ALN)\">\n");
-	fprintf(file, "%s", "##INFO=<ID=SUPTYPE,Number=1,Type=String,Description=\"Type by which the variant is supported.(SR,ALN)\">\n");
+	fprintf(file, "%s", "##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Integer,Description=\"Kurtosis value of the start breakpoints across the reads.\">\n");
+	fprintf(file, "%s", "##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Integer,Description=\"Kurtosis value of the stop breakpoints across the reads.\">\n");
+	fprintf(file, "%s", "##INFO=<ID=SUPTYPE,Number=1,Type=String,Description=\"Type by which the variant is supported.(SR,ALN,NR)\">\n");
 	fprintf(file, "%s", "##INFO=<ID=STRANDS,Number=A,Type=String,Description=\"Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)\">\n");
 	fprintf(file, "%s", "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency.\">\n");
 	fprintf(file, "%s", "##INFO=<ID=ZMW,Number=A,Type=Integer,Description=\"Number of ZMWs (Pacbio) supporting SV.\">\n");
@@ -89,9 +99,9 @@ void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
 			fprintf(file, "%i", id);
 			id++;
 
-			long end_coord=SV->get_coordinates().stop.most_support;
+			long end_coord = SV->get_coordinates().stop.most_support;
 			if (((SV->get_SVtype() & INS) && SV->get_length() == Parameter::Instance()->huge_ins) && SV->get_types().is_ALN) {
-				end_coord=std::max((SV->get_coordinates().stop.most_support - (long)SV->get_length()),(long) start);
+				end_coord = std::max((SV->get_coordinates().stop.most_support - (long) SV->get_length()), (long) start);
 			}
 
 			int end = IPrinter::calc_pos(end_coord, ref, chr);
@@ -123,11 +133,10 @@ void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
 
 			if (((SV->get_SVtype() & INS) && SV->get_length() == Parameter::Instance()->huge_ins) && SV->get_types().is_ALN) {
 				fprintf(file, "%s", "\t.\tUNRESOLVED\t");
-			}else{
+			} else {
 				fprintf(file, "%s", "\t.\tPASS\t");
 			}
 
-
 			if (std_quant.first < 10 && std_quant.second < 10) {
 				fprintf(file, "%s", "PRECISE");
 			} else {
@@ -144,7 +153,7 @@ void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
 				//if (SV->get_SVtype() & INS) {
 				//	fprintf(file, "%i", std::max((int) (end - SV->get_length()), start));
 				//} else {
-					fprintf(file, "%i", end);
+				fprintf(file, "%i", end);
 				//}
 			}
 			if (zmws != 0) {
@@ -175,15 +184,49 @@ void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
 			fprintf(file, "%s", SV->get_supporting_types().c_str());
 			fprintf(file, "%s", ";SVLEN=");
 
-			if (((SV->get_SVtype() & INS) && SV->get_length() == Parameter::Instance()->huge_ins) && SV->get_types().is_ALN) {//!
+			if (((SV->get_SVtype() & INS) && SV->get_length() == Parameter::Instance()->huge_ins) && SV->get_types().is_ALN) {				//!
 				fprintf(file, "%i", 999999999);
-			} else {
+			}else if (SV->get_SVtype() & TRA){
+				fprintf(file, "%i",0);
+			}else if (SV->get_SVtype() & DEL){
+				fprintf(file, "%i", SV->get_length()*-1);
+			}else {
 				fprintf(file, "%i", SV->get_length());
 			}
 			//	}
 			fprintf(file, "%s", ";STRANDS=");
 			fprintf(file, "%s", strands.c_str());
-			if (!SV->get_sequence().empty()) {
+			if (Parameter::Instance()->read_strand) {
+				fprintf(file, "%s", ";STRANDS2=");
+				std::map<std::string, read_str> support = SV->get_coordinates().support;
+				pair<int, int> tmp_start;
+				pair<int, int> tmp_stop;
+				tmp_start.first = 0;
+				tmp_start.second = 0;
+				tmp_stop.first = 0;
+				tmp_stop.second = 0;
+				for (std::map<std::string, read_str>::iterator i = support.begin(); i != support.end(); i++) {
+					if ((*i).second.read_strand.first) {
+						tmp_start.first++;
+					} else {
+						tmp_start.second++;
+					}
+					if ((*i).second.read_strand.second) {
+						tmp_stop.first++;
+					} else {
+						tmp_stop.second++;
+					}
+				}
+				fprintf(file, "%i", tmp_start.first);
+				fprintf(file, "%s", ",");
+				fprintf(file, "%i", tmp_start.second);
+				fprintf(file, "%s", ",");
+				fprintf(file, "%i", tmp_stop.first);
+				fprintf(file, "%s", ",");
+				fprintf(file, "%i", tmp_stop.second);
+			}
+
+			if (Parameter::Instance()->print_seq && !SV->get_sequence().empty()) {
 				fprintf(file, "%s", ";SEQ=");
 				fprintf(file, "%s", SV->get_sequence().c_str());
 			}
@@ -204,6 +247,13 @@ void VCFPrinter::print_body_recall(Breakpoint * &SV, RefVector ref) {
 	if (Parameter::Instance()->phase) {
 		store_readnames(SV->get_read_ids(), id);
 	}
+
+	pair<double, double> kurtosis;
+	pair<double, double> std_quant;
+	double std_length = 0;
+	int zmws = 0;
+	bool ok_to_print = to_print(SV, std_quant, kurtosis, std_length, zmws);
+
 	std::string chr;
 	int start = IPrinter::calc_pos(SV->get_coordinates().start.most_support, ref, chr);
 	fprintf(file, "%s", chr.c_str());


=====================================
src/sub/Breakpoint.cpp
=====================================
@@ -358,9 +358,9 @@ void Breakpoint::predict_SV() {
 	std::vector<long> stops2;
 	std::vector<long> lengths2;
 
-	bool scan_reads=true;
-	if(positions.support.find("input") != positions.support.end() && !Parameter::Instance()->change_coords){
-		scan_reads=false;
+	bool scan_reads = true;
+	if (positions.support.find("input") != positions.support.end() && !Parameter::Instance()->change_coords) {
+		scan_reads = false;
 	}
 
 	for (std::map<std::string, read_str>::iterator i = positions.support.begin(); i != positions.support.end() && scan_reads; i++) {
@@ -441,7 +441,6 @@ void Breakpoint::predict_SV() {
 		this->indel_sequence = "";
 
 		//find the stop coordinate:
-
 		maxim = 0;
 		coord = 0;
 		mean = 0;
@@ -457,6 +456,7 @@ void Breakpoint::predict_SV() {
 		} else {
 			this->positions.stop.most_support = coord;
 		}
+
 		//find the length:
 		if (!(this->get_SVtype() & INS)) { //all types but Insertions:
 			this->length = this->positions.stop.most_support - this->positions.start.most_support;
@@ -479,10 +479,11 @@ void Breakpoint::predict_SV() {
 			} else {
 				this->length = coord;
 			}
-		//	cout << "third len: " << this->length << endl; // problem!
+			//	cout << "third len: " << this->length << endl; // problem!
 		}
 		starts.clear();
 		stops.clear();
+		//strand information:
 		for (size_t i = 0; i < strands.size(); i++) {
 			maxim = 0;
 			std::string id;
@@ -499,19 +500,40 @@ void Breakpoint::predict_SV() {
 			}
 		}
 		strands.clear();
+	}
 
-		std::map<std::string, read_str>::iterator tmp = positions.support.begin();
-		int start_prev_dist = 1000;
-		int stop_prev_dist = 1000;
-		while (tmp != positions.support.end()) {
-			int start_dist = abs((*tmp).second.coordinates.first - this->positions.start.most_support);
-			int stop_dist = abs((*tmp).second.coordinates.second - this->positions.stop.most_support);
-			if (((*tmp).second.SV & this->sv_type) && ((start_dist < start_prev_dist && stop_dist < stop_prev_dist) && (strcmp((*tmp).second.sequence.c_str(), "NA") != 0 && !(*tmp).second.sequence.empty()))) {
-				this->indel_sequence = (*tmp).second.sequence;
-			}
-			tmp++;
+	if (!scan_reads) {
+	//	std::cout<<"Test strand"<<std::endl;
+		//if forcecalling we need to define the strands:
+		std::map<std::string, read_str>::iterator it = positions.support.find("input");
+
+		//no output!??
+		/*if ((*it).second.strand.first) {
+			std::cout << "s1: true" << endl;
+		}
+		if ((*it).second.strand.second) {
+			std::cout << "s2: true" << endl;
+		}*/
+		if (it != positions.support.end()) {
+			this->strand.push_back(translate_strand((*it).second.strand));
 		}
 	}
+	std::map<std::string, read_str>::iterator tmp = positions.support.begin();
+	int start_prev_dist = 1000;
+	int stop_prev_dist = 1000;
+
+	while (tmp != positions.support.end()) {
+		int start_dist = abs((*tmp).second.coordinates.first - this->positions.start.most_support);
+		int stop_dist = abs((*tmp).second.coordinates.second - this->positions.stop.most_support);
+		if (((*tmp).second.SV & this->sv_type) && ((start_dist < start_prev_dist && stop_dist < stop_prev_dist) && (strcmp((*tmp).second.sequence.c_str(), "NA") != 0 && !(*tmp).second.sequence.empty()))) {
+			this->indel_sequence = (*tmp).second.sequence;
+
+			stop_prev_dist = stop_dist; //new to test!
+			start_prev_dist = start_dist;
+		}
+		tmp++;
+	}
+	//}
 	this->supporting_types = "";
 	if (positions.support.find("input") != positions.support.end()) {
 		if (num == 0) {


=====================================
src/sub/Detect_Breakpoints.cpp
=====================================
@@ -203,14 +203,14 @@ void detect_breakpoints(std::string read_filename, IPrinter *& printer) {
 	long num_reads = 0;
 
 	/*Genotyper * go;
-	if (Parameter::Instance()->genotype) {
-		go = new Genotyper();
-	}*/
-	std::cout << "Start parsing... "<<ref[tmp_aln->getRefID()].RefName  << std::endl;
+	 if (Parameter::Instance()->genotype) {
+	 go = new Genotyper();
+	 }*/
+	std::cout << "Start parsing... " << ref[tmp_aln->getRefID()].RefName << std::endl;
 
 	while (!tmp_aln->getQueryBases().empty()) {
 
-		if ((tmp_aln->getAlignment()->IsPrimaryAlignment()) && (!(tmp_aln->getAlignment()->AlignmentFlag & 0x800) && tmp_aln->get_is_save())){// && (Parameter::Instance()->chr_names.empty() || Parameter::Instance()->chr_names.find(ref[tmp_aln->getRefID()].RefName) != Parameter::Instance()->chr_names.end())) {
+		if ((tmp_aln->getAlignment()->IsPrimaryAlignment()) && (!(tmp_aln->getAlignment()->AlignmentFlag & 0x800) && tmp_aln->get_is_save())) {	// && (Parameter::Instance()->chr_names.empty() || Parameter::Instance()->chr_names.find(ref[tmp_aln->getRefID()].RefName) != Parameter::Instance()->chr_names.end())) {
 
 			//change CHR:
 			if (current_RefID != tmp_aln->getRefID()) {
@@ -220,13 +220,13 @@ void detect_breakpoints(std::string read_filename, IPrinter *& printer) {
 				bst.get_breakpoints(root, points);
 				//polish_points(points, ref);
 
-			/*	if (Parameter::Instance()->genotype) {
-					fclose(ref_allel_reads);
-					cout<<"\t\tGenotyping"<<endl;
-					go->update_SVs(points, ref_space);
-					cout<<"\t\tGenotyping finished"<<endl;
-					ref_allel_reads = fopen(Parameter::Instance()->tmp_genotyp.c_str(), "wb");
-				}*/
+				/*	if (Parameter::Instance()->genotype) {
+				 fclose(ref_allel_reads);
+				 cout<<"\t\tGenotyping"<<endl;
+				 go->update_SVs(points, ref_space);
+				 cout<<"\t\tGenotyping finished"<<endl;
+				 ref_allel_reads = fopen(Parameter::Instance()->tmp_genotyp.c_str(), "wb");
+				 }*/
 
 				for (int i = 0; i < points.size(); i++) {
 					points[i]->calc_support();
@@ -283,6 +283,11 @@ void detect_breakpoints(std::string read_filename, IPrinter *& printer) {
 					tmp.chr_id = tmp_aln->getRefID();	//check string in binary???
 					tmp.start = tmp_aln->getPosition();
 					tmp.length = tmp_aln->getRefLength();
+					if (tmp_aln->getStrand()) {
+						tmp.strand = 1;
+					} else {
+						tmp.strand = 2;
+					}
 					fwrite(&tmp, sizeof(struct str_read), 1, ref_allel_reads);
 				}
 
@@ -310,13 +315,13 @@ void detect_breakpoints(std::string read_filename, IPrinter *& printer) {
 	bst.get_breakpoints(root, points);
 
 	/*	if (Parameter::Instance()->genotype) {
-		fclose(ref_allel_reads);
-		go->update_SVs(points, ref_space);
-		string del = "rm ";
-		del += Parameter::Instance()->tmp_genotyp;
-		del += "ref_allele";
-		system(del.c_str());
-	}*/
+	 fclose(ref_allel_reads);
+	 go->update_SVs(points, ref_space);
+	 string del = "rm ";
+	 del += Parameter::Instance()->tmp_genotyp;
+	 del += "ref_allele";
+	 system(del.c_str());
+	 }*/
 
 	for (int i = 0; i < points.size(); i++) {
 		points[i]->calc_support();
@@ -361,79 +366,81 @@ void add_events(Alignment *& tmp, std::vector<str_event> events, short type, lon
 
 	bool flag = (strcmp(tmp->getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0);
 	for (size_t i = 0; i < events.size(); i++) {
-		position_str svs;
-		read_str read;
-		if (events[i].is_noise) {
-			read.type = 2;
-		} else {
-			read.type = 0;
-		}
-		read.SV = events[i].type;
-		read.sequence = events[i].sequence;
+	//	if (events[i - 1].length > Parameter::Instance()->min_segment_size || events[i].length > Parameter::Instance()->min_segment_size) {
+			position_str svs;
+			read_str read;
+			if (events[i].is_noise) {
+				read.type = 2;
+			} else {
+				read.type = 0;
+			}
+			read.SV = events[i].type;
+			read.sequence = events[i].sequence;
 
-		if (flag) {
-			std::cout << "ADD EVENT " << tmp->getName() << " " << tmp->getRefID() << " " << events[i].pos << " " << abs(events[i].length) << std::endl;
-		}
-		svs.start.min_pos = (long) events[i].pos + ref_space;
-		svs.stop.max_pos = svs.start.min_pos + events[i].length;
+			if (flag) {
+				std::cout << "ADD EVENT " << tmp->getName() << " " << tmp->getRefID() << " " << events[i].pos << " " << abs(events[i].length) << std::endl;
+			}
+			svs.start.min_pos = (long) events[i].pos + ref_space;
+			svs.stop.max_pos = svs.start.min_pos + events[i].length;
 
-		if (tmp->getStrand()) {
-			read.strand.first = (tmp->getStrand());
-			read.strand.second = !(tmp->getStrand());
-		} else {
-			read.strand.first = !(tmp->getStrand());
-			read.strand.second = (tmp->getStrand());
-		}
-		//	start.support[0].read_start.min = events[i].read_pos;
+			if (tmp->getStrand()) {
+				read.strand.first = (tmp->getStrand());
+				read.strand.second = !(tmp->getStrand());
+			} else {
+				read.strand.first = !(tmp->getStrand());
+				read.strand.second = (tmp->getStrand());
+			}
+			//	start.support[0].read_start.min = events[i].read_pos;
 
-		read.read_strand.first = tmp->getStrand();
-		read.read_strand.second = tmp->getStrand();
-		if (flag) {
-			std::cout << tmp->getName() << " " << tmp->getRefID() << " " << svs.start.min_pos << " " << svs.stop.max_pos << " " << svs.stop.max_pos - svs.start.min_pos << std::endl;
-		}
+			read.read_strand.first = tmp->getStrand();
+			read.read_strand.second = tmp->getStrand();
+			if (flag) {
+				std::cout << tmp->getName() << " " << tmp->getRefID() << " " << svs.start.min_pos << " " << svs.stop.max_pos << " " << svs.stop.max_pos - svs.start.min_pos << std::endl;
+			}
 
-		if (svs.start.min_pos > svs.stop.max_pos) {
-			//can this actually happen?
-			read.coordinates.first = svs.stop.max_pos;
-			read.coordinates.second = svs.start.min_pos;
-		} else {
-			read.coordinates.first = svs.start.min_pos;
-			read.coordinates.second = svs.stop.max_pos;
-		}
+			if (svs.start.min_pos > svs.stop.max_pos) {
+				//can this actually happen?
+				read.coordinates.first = svs.stop.max_pos;
+				read.coordinates.second = svs.start.min_pos;
+			} else {
+				read.coordinates.first = svs.start.min_pos;
+				read.coordinates.second = svs.stop.max_pos;
+			}
 
-		svs.start.max_pos = svs.start.min_pos;
-		svs.stop.min_pos = svs.stop.max_pos;
+			svs.start.max_pos = svs.start.min_pos;
+			svs.stop.min_pos = svs.stop.max_pos;
 
-		if (svs.start.min_pos > svs.stop.max_pos) { //incase they are inverted
-			svs_breakpoint_str pos = svs.start;
-			svs.start = svs.stop;
-			svs.stop = pos;
-			pair<bool, bool> tmp = read.strand;
-			read.strand.first = tmp.second;
-			read.strand.second = tmp.first;
-		}
+			if (svs.start.min_pos > svs.stop.max_pos) { //incase they are inverted
+				svs_breakpoint_str pos = svs.start;
+				svs.start = svs.stop;
+				svs.stop = pos;
+				pair<bool, bool> tmp = read.strand;
+				read.strand.first = tmp.second;
+				read.strand.second = tmp.first;
+			}
 
-		//TODO: we might not need this:
-		if (svs.start.min_pos > svs.stop.max_pos) {
-			read.coordinates.first = svs.stop.max_pos;
-			read.coordinates.second = svs.start.min_pos;
-		} else {
-			read.coordinates.first = svs.start.min_pos;
-			read.coordinates.second = svs.stop.max_pos;
-		}
+			//TODO: we might not need this:
+			if (svs.start.min_pos > svs.stop.max_pos) {
+				read.coordinates.first = svs.stop.max_pos;
+				read.coordinates.second = svs.start.min_pos;
+			} else {
+				read.coordinates.first = svs.start.min_pos;
+				read.coordinates.second = svs.stop.max_pos;
+			}
 
-		read.id = read_id;
-		svs.support[tmp->getName()] = read;
-		svs.support[tmp->getName()].length = events[i].length;
-		Breakpoint * point = new Breakpoint(svs, events[i].length);
-		if (add) {
-			bst.insert_existant(point, root);
-		} else {
-			bst.insert(point, root);
+			read.id = read_id;
+			svs.support[tmp->getName()] = read;
+			svs.support[tmp->getName()].length = events[i].length;
+			Breakpoint * point = new Breakpoint(svs, events[i].length);
+			if (add) {
+				bst.insert_existant(point, root);
+			} else {
+				bst.insert(point, root);
+			}
+			//std::cout<<"Print:"<<std::endl;
+			//bst.print(root);
 		}
-		//std::cout<<"Print:"<<std::endl;
-		//bst.print(root);
-	}
+//	}
 }
 
 void add_splits(Alignment *& tmp, std::vector<aln_str> events, short type, RefVector ref, IntervallTree& bst, TNode *&root, long read_id, bool add) {
@@ -452,212 +459,214 @@ void add_splits(Alignment *& tmp, std::vector<aln_str> events, short type, RefVe
 	}
 
 	for (size_t i = 1; i < events.size(); i++) {
-		position_str svs;
-		//position_str stop;
-		read_str read;
-		read.sequence = "NA";
-		//read.name = tmp->getName();
-		read.type = type;
-		read.SV = 0;
-		read.read_strand.first = events[i - 1].strand;
-		read.read_strand.second = events[i].strand;
-
-		//stop.support.push_back(read);
-		if (events[i].RefID == events[i - 1].RefID) { //IF different chr -> tra
-			if (events[i - 1].strand == events[i].strand) { //IF same strand -> del/ins/dup
-				if (events[i - 1].strand) {
-					read.strand.first = events[i - 1].strand;
-					read.strand.second = !events[i].strand;
-				} else {
-					read.strand.first = !events[i - 1].strand;
-					read.strand.second = events[i].strand;
-				}
-				//	int len1 = 0;
-				//int len2 = 0;
-				svs.read_start = events[i - 1].read_pos_stop; // (short) events[i - 1].read_pos_start + (short) events[i - 1].length;
-				svs.read_stop = events[i].read_pos_start;
-				if (events[i - 1].strand) {
-					svs.start.min_pos = events[i - 1].pos + events[i - 1].length + get_ref_lengths(events[i - 1].RefID, ref);
-					svs.stop.max_pos = events[i].pos + get_ref_lengths(events[i].RefID, ref);
-				} else {
-					svs.start.min_pos = events[i].pos + events[i].length + get_ref_lengths(events[i].RefID, ref);
-					svs.stop.max_pos = events[i - 1].pos + get_ref_lengths(events[i - 1].RefID, ref);
-				}
+	//	if (events[i - 1].length > Parameter::Instance()->min_segment_size || events[i].length > Parameter::Instance()->min_segment_size) {
+			position_str svs;
+			//position_str stop;
+			read_str read;
+			read.sequence = "NA";
+			//read.name = tmp->getName();
+			read.type = type;
+			read.SV = 0;
+			read.read_strand.first = events[i - 1].strand;
+			read.read_strand.second = events[i].strand;
+
+			//stop.support.push_back(read);
+			if (events[i].RefID == events[i - 1].RefID) { //IF different chr -> tra
+				if (events[i - 1].strand == events[i].strand) { //IF same strand -> del/ins/dup
+					if (events[i - 1].strand) {
+						read.strand.first = events[i - 1].strand;
+						read.strand.second = !events[i].strand;
+					} else {
+						read.strand.first = !events[i - 1].strand;
+						read.strand.second = events[i].strand;
+					}
+					//	int len1 = 0;
+					//int len2 = 0;
+					svs.read_start = events[i - 1].read_pos_stop; // (short) events[i - 1].read_pos_start + (short) events[i - 1].length;
+					svs.read_stop = events[i].read_pos_start;
+					if (events[i - 1].strand) {
+						svs.start.min_pos = events[i - 1].pos + events[i - 1].length + get_ref_lengths(events[i - 1].RefID, ref);
+						svs.stop.max_pos = events[i].pos + get_ref_lengths(events[i].RefID, ref);
+					} else {
+						svs.start.min_pos = events[i].pos + events[i].length + get_ref_lengths(events[i].RefID, ref);
+						svs.stop.max_pos = events[i - 1].pos + get_ref_lengths(events[i - 1].RefID, ref);
+					}
 
-				if (flag) {
-					cout << "Debug: SV_Size: " << (svs.start.min_pos - svs.stop.max_pos) << " tmp: " << (svs.stop.max_pos - svs.start.min_pos) << " Ref_start: " << svs.start.min_pos - get_ref_lengths(events[i].RefID, ref) << " Ref_stop: " << svs.stop.max_pos - get_ref_lengths(events[i].RefID, ref) << " readstart: " << svs.read_start << " readstop: " << svs.read_stop << std::endl;
-				}
+					if (flag) {
+						cout << "Debug: SV_Size: " << (svs.start.min_pos - svs.stop.max_pos) << " tmp: " << (svs.stop.max_pos - svs.start.min_pos) << " Ref_start: " << svs.start.min_pos - get_ref_lengths(events[i].RefID, ref) << " Ref_stop: " << svs.stop.max_pos - get_ref_lengths(events[i].RefID, ref) << " readstart: " << svs.read_start << " readstop: " << svs.read_stop << std::endl;
+					}
 
-				if ((svs.stop.max_pos - svs.start.min_pos) > Parameter::Instance()->min_length * -1 && ((svs.stop.max_pos - svs.start.min_pos) + (Parameter::Instance()->min_length) < (svs.read_stop - svs.read_start) && (svs.read_stop - svs.read_start) > (Parameter::Instance()->min_length * 2))) {
-					if (!events[i].cross_N || (double) ((svs.stop.max_pos - svs.start.min_pos) + Parameter::Instance()->min_length) < ((double) (svs.read_stop - svs.read_start) * Parameter::Instance()->avg_ins)) {
-						svs.stop.max_pos += (svs.read_stop - svs.read_start); //TODO check!
-						if (Parameter::Instance()->print_seq) {
-							svs.read_stop = events[i].read_pos_start;
-							svs.read_start = events[i - 1].read_pos_stop;
-							if (svs.read_stop > tmp->getAlignment()->QueryBases.size()) {
-								cerr << "BUG: split read ins! " << svs.read_stop << " " << tmp->getAlignment()->QueryBases.size() << " " << tmp->getName() << endl;
+					if ((svs.stop.max_pos - svs.start.min_pos) > Parameter::Instance()->min_length * -1 && ((svs.stop.max_pos - svs.start.min_pos) + (Parameter::Instance()->min_length) < (svs.read_stop - svs.read_start) && (svs.read_stop - svs.read_start) > (Parameter::Instance()->min_length * 2))) {
+						if (!events[i].cross_N || (double) ((svs.stop.max_pos - svs.start.min_pos) + Parameter::Instance()->min_length) < ((double) (svs.read_stop - svs.read_start) * Parameter::Instance()->avg_ins)) {
+							svs.stop.max_pos += (svs.read_stop - svs.read_start); //TODO check!
+							if (Parameter::Instance()->print_seq) {
+								svs.read_stop = events[i].read_pos_start;
+								svs.read_start = events[i - 1].read_pos_stop;
+								if (svs.read_stop > tmp->getAlignment()->QueryBases.size()) {
+									cerr << "BUG: split read ins! " << svs.read_stop << " " << tmp->getAlignment()->QueryBases.size() << " " << tmp->getName() << endl;
+								}
+								if (!events[i - 1].strand) {
+									std::string tmp_seq = reverse_complement(tmp->getAlignment()->QueryBases);
+
+									read.sequence = reverse_complement(tmp_seq.substr(svs.read_start, svs.read_stop - svs.read_start));
+								} else {
+									read.sequence = tmp->getAlignment()->QueryBases.substr(svs.read_start, svs.read_stop - svs.read_start);
+								}
+								if (flag) {
+									cout << "INS: " << endl;
+									cout << "split read ins! " << events[i - 1].read_pos_stop << " " << events[i].read_pos_start << " " << " " << tmp->getAlignment()->QueryBases.size() << " " << tmp->getName() << endl;
+									cout << "Seq+:" << read.sequence << endl;
+								}
 							}
-							if (!events[i - 1].strand) {
-								std::string tmp_seq = reverse_complement(tmp->getAlignment()->QueryBases);
+							read.SV |= INS;
+						} else {
+							read.SV |= 'n';
+						}
 
-								read.sequence = reverse_complement(tmp_seq.substr(svs.read_start, svs.read_stop - svs.read_start));
-							} else {
-								read.sequence = tmp->getAlignment()->QueryBases.substr(svs.read_start, svs.read_stop - svs.read_start);
-							}
+					} else if ((svs.start.min_pos - svs.stop.max_pos) * -1 > (svs.read_stop - svs.read_start) + (Parameter::Instance()->min_length)) {
+						if (!events[i].cross_N || (double) (svs.start.min_pos - svs.stop.max_pos) * Parameter::Instance()->avg_del * -1.0 > (double) ((svs.read_stop - svs.read_start) + (Parameter::Instance()->min_length))) {
+							read.SV |= DEL;
 							if (flag) {
-								cout << "INS: " << endl;
-								cout << "split read ins! " << events[i - 1].read_pos_stop << " " << events[i].read_pos_start << " " << " " << tmp->getAlignment()->QueryBases.size() << " " << tmp->getName() << endl;
-								cout << "Seq+:" << read.sequence << endl;
+								cout << "DEL2" << endl;
 							}
+						} else {
+							read.SV |= 'n';
 						}
-						read.SV |= INS;
-					} else {
-						read.SV |= 'n';
-					}
 
-				} else if ((svs.start.min_pos - svs.stop.max_pos) * -1 > (svs.read_stop - svs.read_start) + (Parameter::Instance()->min_length)) {
-					if (!events[i].cross_N || (double) (svs.start.min_pos - svs.stop.max_pos) * Parameter::Instance()->avg_del * -1.0 > (double) ((svs.read_stop - svs.read_start) + (Parameter::Instance()->min_length))) {
-						read.SV |= DEL;
+					} else if ((svs.start.min_pos - svs.stop.max_pos) > Parameter::Instance()->min_length && (svs.read_start - svs.read_stop) < Parameter::Instance()->min_length) { //check with respect to the coords of reads!
 						if (flag) {
-							cout << "DEL2" << endl;
+							cout << "DUP: " << endl;
 						}
+						read.SV |= DUP;
 					} else {
-						read.SV |= 'n';
+						if (flag) {
+							cout << "N" << endl;
+						}
+						read.SV = 'n';
 					}
+				} else { // if first part of read is in a different direction as the second part-> INV
 
-				} else if ((svs.start.min_pos - svs.stop.max_pos) > Parameter::Instance()->min_length && (svs.read_start - svs.read_stop) < Parameter::Instance()->min_length) { //check with respect to the coords of reads!
-					if (flag) {
-						cout << "DUP: " << endl;
-					}
-					read.SV |= DUP;
-				} else {
-					if (flag) {
-						cout << "N" << endl;
+					read.strand.first = events[i - 1].strand;
+					read.strand.second = !events[i].strand;
+
+					bool is_overlapping = overlaps(events[i - 1], events[i]);
+					if (is_overlapping && (events[i - 1].length > Parameter::Instance()->min_segment_size || events[i].length > Parameter::Instance()->min_segment_size)) {
+						if (flag) {
+							std::cout << "Overlap curr: " << events[i].pos << " " << events[i].pos + events[i].length << " prev: " << events[i - 1].pos << " " << events[i - 1].pos + events[i - 1].length << " " << tmp->getName() << std::endl;
+						}
+						read.SV |= NEST;
+
+						if (events[i - 1].strand) {
+							svs.start.min_pos = events[i - 1].pos + events[i - 1].length + get_ref_lengths(events[i - 1].RefID, ref);
+							svs.stop.max_pos = (events[i].pos + events[i].length) + get_ref_lengths(events[i].RefID, ref);
+						} else {
+							svs.start.min_pos = events[i - 1].pos + get_ref_lengths(events[i - 1].RefID, ref);
+							svs.stop.max_pos = events[i].pos + get_ref_lengths(events[i].RefID, ref);
+						}
+
+						if (svs.start.min_pos > svs.stop.max_pos) {
+							long tmp = svs.start.min_pos;
+							svs.start.min_pos = svs.stop.max_pos;
+							svs.stop.max_pos = tmp;
+						}
+					} else if (!is_overlapping) {
+						read.SV |= INV;
+						if (events[i - 1].strand) {
+							svs.start.min_pos = events[i - 1].pos + events[i - 1].length + get_ref_lengths(events[i - 1].RefID, ref);
+							svs.stop.max_pos = (events[i].pos + events[i].length) + get_ref_lengths(events[i].RefID, ref);
+						} else {
+							svs.start.min_pos = events[i - 1].pos + get_ref_lengths(events[i - 1].RefID, ref);
+							svs.stop.max_pos = events[i].pos + get_ref_lengths(events[i].RefID, ref);
+						}
 					}
-					read.SV = 'n';
 				}
-			} else { // if first part of read is in a different direction as the second part-> INV
 
+			} else { //if not on the same chr-> TRA
 				read.strand.first = events[i - 1].strand;
 				read.strand.second = !events[i].strand;
-
-				bool is_overlapping = overlaps(events[i - 1], events[i]);
-				if (is_overlapping && (events[i - 1].length > Parameter::Instance()->min_segment_size || events[i].length > Parameter::Instance()->min_segment_size)) {
-					if (flag) {
-						std::cout << "Overlap curr: " << events[i].pos << " " << events[i].pos + events[i].length << " prev: " << events[i - 1].pos << " " << events[i - 1].pos + events[i - 1].length << " " << tmp->getName() << std::endl;
-					}
-					read.SV |= NEST;
+				if (events[i - 1].strand == events[i].strand) {
+					//check this with + - strands!!
 
 					if (events[i - 1].strand) {
 						svs.start.min_pos = events[i - 1].pos + events[i - 1].length + get_ref_lengths(events[i - 1].RefID, ref);
-						svs.stop.max_pos = (events[i].pos + events[i].length) + get_ref_lengths(events[i].RefID, ref);
+						svs.stop.max_pos = events[i].pos + get_ref_lengths(events[i].RefID, ref);
 					} else {
 						svs.start.min_pos = events[i - 1].pos + get_ref_lengths(events[i - 1].RefID, ref);
-						svs.stop.max_pos = events[i].pos + get_ref_lengths(events[i].RefID, ref);
-					}
-
-					if (svs.start.min_pos > svs.stop.max_pos) {
-						long tmp = svs.start.min_pos;
-						svs.start.min_pos = svs.stop.max_pos;
-						svs.stop.max_pos = tmp;
+						svs.stop.max_pos = events[i].pos + events[i].length + get_ref_lengths(events[i].RefID, ref);
 					}
-				} else if (!is_overlapping) {
-					read.SV |= INV;
+				} else {
 					if (events[i - 1].strand) {
 						svs.start.min_pos = events[i - 1].pos + events[i - 1].length + get_ref_lengths(events[i - 1].RefID, ref);
-						svs.stop.max_pos = (events[i].pos + events[i].length) + get_ref_lengths(events[i].RefID, ref);
+						svs.stop.max_pos = events[i].pos + events[i].length + get_ref_lengths(events[i].RefID, ref);
 					} else {
 						svs.start.min_pos = events[i - 1].pos + get_ref_lengths(events[i - 1].RefID, ref);
 						svs.stop.max_pos = events[i].pos + get_ref_lengths(events[i].RefID, ref);
 					}
 				}
+				read.SV |= TRA;
 			}
 
-		} else { //if not on the same chr-> TRA
-			read.strand.first = events[i - 1].strand;
-			read.strand.second = !events[i].strand;
-			if (events[i - 1].strand == events[i].strand) {
-				//check this with + - strands!!
-
-				if (events[i - 1].strand) {
-					svs.start.min_pos = events[i - 1].pos + events[i - 1].length + get_ref_lengths(events[i - 1].RefID, ref);
-					svs.stop.max_pos = events[i].pos + get_ref_lengths(events[i].RefID, ref);
-				} else {
-					svs.start.min_pos = events[i - 1].pos + get_ref_lengths(events[i - 1].RefID, ref);
-					svs.stop.max_pos = events[i].pos + events[i].length + get_ref_lengths(events[i].RefID, ref);
+			if (read.SV != 'n') {
+				if (flag) {
+					std::cout << "SPLIT: " << TRANS_type(read.SV) << " start: " << svs.start.min_pos - get_ref_lengths(events[i].RefID, ref) << " stop: " << svs.stop.max_pos - get_ref_lengths(events[i].RefID, ref);
+					if (events[i - 1].strand) {
+						std::cout << " +";
+					} else {
+						std::cout << " -";
+					}
+					if (events[i].strand) {
+						std::cout << " +";
+					} else {
+						std::cout << " -";
+					}
+					std::cout << " " << tmp->getName() << std::endl;
+					std::cout << "READ: " << svs.read_start << " " << svs.read_stop << " " << svs.read_start - svs.read_stop << std::endl;
 				}
-			} else {
-				if (events[i - 1].strand) {
-					svs.start.min_pos = events[i - 1].pos + events[i - 1].length + get_ref_lengths(events[i - 1].RefID, ref);
-					svs.stop.max_pos = events[i].pos + events[i].length + get_ref_lengths(events[i].RefID, ref);
-				} else {
-					svs.start.min_pos = events[i - 1].pos + get_ref_lengths(events[i - 1].RefID, ref);
-					svs.stop.max_pos = events[i].pos + get_ref_lengths(events[i].RefID, ref);
+				//std::cout<<"split"<<std::endl;
+
+				svs.start.max_pos = svs.start.min_pos;
+				svs.stop.min_pos = svs.stop.max_pos;
+				if (svs.start.min_pos > svs.stop.max_pos) {
+					//maybe we have to invert the directions???
+					svs_breakpoint_str pos = svs.start;
+					svs.start = svs.stop;
+					svs.stop = pos;
+
+					pair<bool, bool> tmp = read.strand;
+
+					read.strand.first = tmp.second;
+					read.strand.second = tmp.first;
 				}
-			}
-			read.SV |= TRA;
-		}
 
-		if (read.SV != 'n') {
-			if (flag) {
-				std::cout << "SPLIT: " << TRANS_type(read.SV) << " start: " << svs.start.min_pos - get_ref_lengths(events[i].RefID, ref) << " stop: " << svs.stop.max_pos - get_ref_lengths(events[i].RefID, ref);
-				if (events[i - 1].strand) {
-					std::cout << " +";
+				//TODO: we might not need this:
+				if (svs.start.min_pos > svs.stop.max_pos) {
+					read.coordinates.first = svs.stop.max_pos;
+					read.coordinates.second = svs.start.min_pos;
 				} else {
-					std::cout << " -";
+					read.coordinates.first = svs.start.min_pos;
+					read.coordinates.second = svs.stop.max_pos;
 				}
-				if (events[i].strand) {
-					std::cout << " +";
+
+				//pool out?
+				read.id = read_id;
+				svs.support[tmp->getName()] = read;
+				svs.support[tmp->getName()].length = abs(read.coordinates.second - read.coordinates.first);
+				Breakpoint * point = new Breakpoint(svs, abs(read.coordinates.second - read.coordinates.first));
+				//std::cout<<"split ADD: " << <<" Name: "<<tmp->getName()<<" "<< svs.start.min_pos- get_ref_lengths(events[i].RefID, ref)<<"-"<<svs.stop.max_pos- get_ref_lengths(events[i].RefID, ref)<<std::endl;
+				if (add) {
+					bst.insert_existant(point, root);
 				} else {
-					std::cout << " -";
+					bst.insert(point, root);
 				}
-				std::cout << " " << tmp->getName() << std::endl;
-				std::cout << "READ: " << svs.read_start << " " << svs.read_stop << " " << svs.read_start - svs.read_stop << std::endl;
-			}
-			//std::cout<<"split"<<std::endl;
-
-			svs.start.max_pos = svs.start.min_pos;
-			svs.stop.min_pos = svs.stop.max_pos;
-			if (svs.start.min_pos > svs.stop.max_pos) {
-				//maybe we have to invert the directions???
-				svs_breakpoint_str pos = svs.start;
-				svs.start = svs.stop;
-				svs.stop = pos;
-
-				pair<bool, bool> tmp = read.strand;
-
-				read.strand.first = tmp.second;
-				read.strand.second = tmp.first;
-			}
-
-			//TODO: we might not need this:
-			if (svs.start.min_pos > svs.stop.max_pos) {
-				read.coordinates.first = svs.stop.max_pos;
-				read.coordinates.second = svs.start.min_pos;
-			} else {
-				read.coordinates.first = svs.start.min_pos;
-				read.coordinates.second = svs.stop.max_pos;
+				//	std::cout<<"Print:"<<std::endl;
+				//	bst.print(root);
 			}
-
-			//pool out?
-			read.id = read_id;
-			svs.support[tmp->getName()] = read;
-			svs.support[tmp->getName()].length = abs(read.coordinates.second - read.coordinates.first);
-			Breakpoint * point = new Breakpoint(svs, abs(read.coordinates.second - read.coordinates.first));
-			//std::cout<<"split ADD: " << <<" Name: "<<tmp->getName()<<" "<< svs.start.min_pos- get_ref_lengths(events[i].RefID, ref)<<"-"<<svs.stop.max_pos- get_ref_lengths(events[i].RefID, ref)<<std::endl;
-			if (add) {
-				bst.insert_existant(point, root);
-			} else {
-				bst.insert(point, root);
-			}
-			//	std::cout<<"Print:"<<std::endl;
-			//	bst.print(root);
 		}
-	}
+	//}
 }
 
 void estimate_parameters(std::string read_filename) {
-	if(Parameter::Instance()->skip_parameter_estimation){
+	if (Parameter::Instance()->skip_parameter_estimation) {
 		return;
 	}
 	cout << "Estimating parameter..." << endl;
@@ -692,9 +701,10 @@ void estimate_parameters(std::string read_filename) {
 			double avg_del = 0;
 			double avg_ins = 0;
 			vector<int> tmp = tmp_aln->get_avg_diff(dist, avg_del, avg_ins);
+		//	std::cout<<"Debug:\t"<<avg_del<<" "<<avg_ins<<endl;
 			tot_avg_ins += avg_ins;
 			tot_avg_del += avg_del;
-			//std::cout<<"Debug:\t"<<dist<<"\t";
+			//
 			avg_dist += dist;
 			double avg_mis = 0;
 			for (size_t i = 0; i < tmp.size(); i++) {
@@ -749,6 +759,8 @@ void estimate_parameters(std::string read_filename) {
 	pos = nums.size() * 0.05; //the lowest 5% cuttoff
 	Parameter::Instance()->score_treshold = 2; //nums[pos]; //prev=2
 
+	//cout<<"test: "<<tot_avg_ins<<" "<<num<<endl;
+	//cout<<"test2: "<<tot_avg_del<<" "<<num<<endl;
 	Parameter::Instance()->avg_del = tot_avg_del / num;
 	Parameter::Instance()->avg_ins = tot_avg_ins / num;
 


=====================================
src/tree/Breakpoint_Tree.cpp
=====================================
@@ -25,28 +25,36 @@ void Breakpoint_Tree::find(int position, std::string chr, breakpoint_node *par,
 	}
 }
 
-void Breakpoint_Tree::overalps(int start, int stop, std::string chr, breakpoint_node *par) {
+void Breakpoint_Tree::overalps(int start, int stop, std::string chr, breakpoint_node *par, bool ref_strand) {
 	//start + stop: read coordinates.
 	if (par == NULL) { //not found
 		return;
 	}
 	if (par->direction) { //start
-		if ((par->position-100 > start && par->position+100 < stop) && strcmp(chr.c_str(), par->chr.c_str()) == 0) { //found
-			par->ref_support++;
-	//		std::cout<<"start: "<<start<<" "<<stop<<std::endl;
+		if ((par->position - 100 > start && par->position + 100 < stop) && strcmp(chr.c_str(), par->chr.c_str()) == 0) { //found
+			if (ref_strand) {
+				par->ref_support.first++;
+			} else {
+				par->ref_support.second++;
+			}
+			//		std::cout<<"start: "<<start<<" "<<stop<<std::endl;
 		}
 	} else { //stop coordinate
-		if ((par->position > start+100 && par->position < stop-100) && strcmp(chr.c_str(), par->chr.c_str()) == 0) { //found
-			par->ref_support++;
-	//		std::cout<<"stop: "<< start<<" "<<stop<<std::endl;
+		if ((par->position > start + 100 && par->position < stop - 100) && strcmp(chr.c_str(), par->chr.c_str()) == 0) { //found
+			if (ref_strand) {
+				par->ref_support.first++;
+			} else {
+				par->ref_support.second++;
+			}
+			//		std::cout<<"stop: "<< start<<" "<<stop<<std::endl;
 		}
 	}
 
 	//search goes on:
 	if (start < par->position) {
-		overalps(start, stop, chr, par->left);
+		overalps(start, stop, chr, par->left, ref_strand);
 	} else {
-		overalps(start, stop, chr, par->right);
+		overalps(start, stop, chr, par->right, ref_strand);
 	}
 }
 
@@ -57,7 +65,8 @@ void Breakpoint_Tree::insert(breakpoint_node *&tree, std::string chr, int positi
 	if (tree == NULL) {
 		tree = new breakpoint_node;
 		tree->position = position;
-		tree->ref_support = 0;
+		tree->ref_support.first = 0;
+		tree->ref_support.second = 0;
 		tree->chr = chr;
 		tree->direction = direction;
 		tree->left = NULL;
@@ -70,19 +79,23 @@ void Breakpoint_Tree::insert(breakpoint_node *&tree, std::string chr, int positi
 		//std::cerr << "Element exists!" << std::endl;
 		//TODO we should use this information to assess the reliability of this call!
 	} else {
-		insert(tree->left, chr, position,position); //think about that!
+		insert(tree->left, chr, position, position); //think about that!
 	}
 }
 
-int Breakpoint_Tree::get_ref(breakpoint_node *&tree, std::string chr, int position) {
+std::pair<int,int> Breakpoint_Tree::get_ref(breakpoint_node *&tree, std::string chr, int position ) {
 	if (tree == NULL) {
-		return -1;
+		std::pair<int,int> tmp;
+		tmp.first=-1;
+		tmp.second=-1;
+		return tmp;
 	}
 	if (tree->position > position) {
 		return get_ref(tree->left, chr, position);
 	} else if (tree->position < position) {
 		return get_ref(tree->right, chr, position);
 	} else if (strcmp(chr.c_str(), tree->chr.c_str()) == 0) { // found element
+
 		return tree->ref_support;
 	} else {
 		return get_ref(tree->left, chr, position); //just in case.
@@ -231,7 +244,7 @@ void Breakpoint_Tree::inorder(breakpoint_node *ptr) {
 	}
 	if (ptr != NULL) {
 		inorder(ptr->left);
-		std::cout << ptr->chr << " " << ptr->position << "  " << ptr->ref_support << std::endl;
+		std::cout << ptr->chr << " " << ptr->position << "  " << ptr->ref_support.first <<" "<< ptr->ref_support.second<< std::endl;
 		inorder(ptr->right);
 	}
 }


=====================================
src/tree/Breakpoint_Tree.h
=====================================
@@ -16,7 +16,7 @@ struct breakpoint_node {
 	std::string chr;
 	int position; // value to store!
 	bool direction;
-	int ref_support;
+	std::pair<int,int> ref_support;
 	breakpoint_node *left;
 	breakpoint_node *right;
 };
@@ -40,8 +40,8 @@ public:
 	void postorder(breakpoint_node *ptr);
 	void display(breakpoint_node *ptr, int);
 	void get_nodes(breakpoint_node *ptr, std::vector<int> & nodes);
-	void overalps(int start,int stop,std::string chr, breakpoint_node *par);
-	int get_ref(breakpoint_node *&tree, std::string chr, int position);
+	void overalps(int start,int stop,std::string chr, breakpoint_node *par,bool ref_strand);
+	std::pair<int,int> get_ref(breakpoint_node *&tree, std::string chr, int position);
 };
 
 


=====================================
src/tree/IntervallTree.cpp
=====================================
@@ -12,6 +12,7 @@ void IntervallTree::careful_screening(Breakpoint *& new_break, TNode *p) { //may
 		careful_screening(new_break, p->left);
 		if (p->get_data()->overlap(new_break) == 0) { //SV type
 			p->get_data()->add_read(new_break);
+			//cout<<"Merged: "<<endl;
 			new_break->set_coordinates(-1, -1);
 			return;
 		}
@@ -32,6 +33,7 @@ void IntervallTree::insert(Breakpoint * new_break, TNode *&p) {
 	} else { // find on tree:
 		long score = p->get_data()->overlap(new_break); //comparison function
 		if (score == 0) { //add SV types?
+			//cout<<"Merged"<<endl;
 			p->get_data()->add_read(new_break);
 			new_break->set_coordinates(-1, -1);
 			//delete new_break;


=====================================
test_set/README
=====================================
@@ -1,10 +1,11 @@
 This is a test set to confirm the correct installation of Sniffles.
 Run Sniffles with default parameters:
 ./sniffles -m reads_region.bam -v test.vcf
+this should take no longer than 1 second. 
 
 You should expect to find one deletion at 21:21492142-21492648. You can also compare to the output provided in expected_output.vcf.
 
-If you have questions or concern please submit an issue on github: 
+If you have questions or concern, please submit an issue on github: 
 https://github.com/fritzsedlazeck/Sniffles/issues
 
 or drop me an email:


=====================================
test_set/test.vcf
=====================================
@@ -0,0 +1,118 @@
+##fileformat=VCFv4.2
+##source=Sniffles
+##fileDate=20180307
+##contig=<ID=1,length=249250621>
+##contig=<ID=2,length=243199373>
+##contig=<ID=3,length=198022430>
+##contig=<ID=4,length=191154276>
+##contig=<ID=5,length=180915260>
+##contig=<ID=6,length=171115067>
+##contig=<ID=7,length=159138663>
+##contig=<ID=8,length=146364022>
+##contig=<ID=9,length=141213431>
+##contig=<ID=10,length=135534747>
+##contig=<ID=11,length=135006516>
+##contig=<ID=12,length=133851895>
+##contig=<ID=13,length=115169878>
+##contig=<ID=14,length=107349540>
+##contig=<ID=15,length=102531392>
+##contig=<ID=16,length=90354753>
+##contig=<ID=17,length=81195210>
+##contig=<ID=18,length=78077248>
+##contig=<ID=19,length=59128983>
+##contig=<ID=20,length=63025520>
+##contig=<ID=21,length=48129895>
+##contig=<ID=22,length=51304566>
+##contig=<ID=X,length=155270560>
+##contig=<ID=Y,length=59373566>
+##contig=<ID=MT,length=16569>
+##contig=<ID=GL000207.1,length=4262>
+##contig=<ID=GL000226.1,length=15008>
+##contig=<ID=GL000229.1,length=19913>
+##contig=<ID=GL000231.1,length=27386>
+##contig=<ID=GL000210.1,length=27682>
+##contig=<ID=GL000239.1,length=33824>
+##contig=<ID=GL000235.1,length=34474>
+##contig=<ID=GL000201.1,length=36148>
+##contig=<ID=GL000247.1,length=36422>
+##contig=<ID=GL000245.1,length=36651>
+##contig=<ID=GL000197.1,length=37175>
+##contig=<ID=GL000203.1,length=37498>
+##contig=<ID=GL000246.1,length=38154>
+##contig=<ID=GL000249.1,length=38502>
+##contig=<ID=GL000196.1,length=38914>
+##contig=<ID=GL000248.1,length=39786>
+##contig=<ID=GL000244.1,length=39929>
+##contig=<ID=GL000238.1,length=39939>
+##contig=<ID=GL000202.1,length=40103>
+##contig=<ID=GL000234.1,length=40531>
+##contig=<ID=GL000232.1,length=40652>
+##contig=<ID=GL000206.1,length=41001>
+##contig=<ID=GL000240.1,length=41933>
+##contig=<ID=GL000236.1,length=41934>
+##contig=<ID=GL000241.1,length=42152>
+##contig=<ID=GL000243.1,length=43341>
+##contig=<ID=GL000242.1,length=43523>
+##contig=<ID=GL000230.1,length=43691>
+##contig=<ID=GL000237.1,length=45867>
+##contig=<ID=GL000233.1,length=45941>
+##contig=<ID=GL000204.1,length=81310>
+##contig=<ID=GL000198.1,length=90085>
+##contig=<ID=GL000208.1,length=92689>
+##contig=<ID=GL000191.1,length=106433>
+##contig=<ID=GL000227.1,length=128374>
+##contig=<ID=GL000228.1,length=129120>
+##contig=<ID=GL000214.1,length=137718>
+##contig=<ID=GL000221.1,length=155397>
+##contig=<ID=GL000209.1,length=159169>
+##contig=<ID=GL000218.1,length=161147>
+##contig=<ID=GL000220.1,length=161802>
+##contig=<ID=GL000213.1,length=164239>
+##contig=<ID=GL000211.1,length=166566>
+##contig=<ID=GL000199.1,length=169874>
+##contig=<ID=GL000217.1,length=172149>
+##contig=<ID=GL000216.1,length=172294>
+##contig=<ID=GL000215.1,length=172545>
+##contig=<ID=GL000205.1,length=174588>
+##contig=<ID=GL000219.1,length=179198>
+##contig=<ID=GL000224.1,length=179693>
+##contig=<ID=GL000223.1,length=180455>
+##contig=<ID=GL000195.1,length=182896>
+##contig=<ID=GL000212.1,length=186858>
+##contig=<ID=GL000222.1,length=186861>
+##contig=<ID=GL000200.1,length=187035>
+##contig=<ID=GL000193.1,length=189789>
+##contig=<ID=GL000194.1,length=191469>
+##contig=<ID=GL000225.1,length=211173>
+##contig=<ID=GL000192.1,length=547496>
+##contig=<ID=NC_007605,length=171823>
+##contig=<ID=hs37d5,length=35477943>
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries">
+##ALT=<ID=TRA,Description="Translocation">
+##ALT=<ID=INS,Description="Insertion">
+##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant">
+##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends">
+##INFO=<ID=RE,Number=1,Type=Integer,Description="read support">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV">
+##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=STD_quant_start,Number=A,Type=Integer,Description="STD of the start breakpoints across the reads.">
+##INFO=<ID=STD_quant_stop,Number=A,Type=Integer,Description="STD of the stop breakpoints across the reads.">
+##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Integer,Description="Kurtosis value of the start breakpoints accross the reads.">
+##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Integer,Description="Kurtosis value of the stop breakpoints accross the reads.">
+##INFO=<ID=SUPTYPE,Number=1,Type=String,Description="Type by which the variant is supported.(SR,ALN)">
+##INFO=<ID=SUPTYPE,Number=1,Type=String,Description="Type by which the variant is supported.(SR,ALN)">
+##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency.">
+##INFO=<ID=ZMW,Number=A,Type=Integer,Description="Number of ZMWs (Pacbio) supporting SV.">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads">
+##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	reads_region.bam
+21	21492142	0	N	<DEL>	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.8;CHR2=21;END=21492648;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=0.572582;Kurtosis_quant_stop=1.417662;SVTYPE=DEL;SUPTYPE=AL,SR;SVLEN=506;STRANDS=+-;RE=48	GT:DR:DV	./.:.:48



View it on GitLab: https://salsa.debian.org/med-team/sniffles/commit/7418012d740c7dc303df9ab996302bc365220467

-- 
View it on GitLab: https://salsa.debian.org/med-team/sniffles/commit/7418012d740c7dc303df9ab996302bc365220467
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20181015/999cbe25/attachment-0001.html>


More information about the debian-med-commit mailing list