[med-svn] [Git][med-team/sniffles][upstream] New upstream version 1.0.11+ds

Andreas Tille gitlab at salsa.debian.org
Fri Feb 1 14:32:16 GMT 2019


Andreas Tille pushed to branch upstream at Debian Med / sniffles


Commits:
4cb082cc by Andreas Tille at 2019-02-01T14:28:30Z
New upstream version 1.0.11+ds
- - - - -


5 changed files:

- CMakeLists.txt
- src/Alignment.cpp
- src/Paramer.h
- src/Sniffles.cpp
- src/print/VCFPrinter.cpp


Changes:

=====================================
CMakeLists.txt
=====================================
@@ -7,9 +7,9 @@ option(STATIC "Build static binary" OFF)
  set( SNIF_VERSION_MINOR 0 )
 IF(CMAKE_BUILD_TYPE STREQUAL "Debug")
     message(STATUS "Building in debug mode!")
-    set( SNIF_VERSION_BUILD 10-debug )
+    set( SNIF_VERSION_BUILD 11-debug )
 else()
-	set( SNIF_VERSION_BUILD 10 )
+	set( SNIF_VERSION_BUILD 11 )
 ENDIF()
 
 


=====================================
src/Alignment.cpp
=====================================
@@ -61,6 +61,7 @@ void add_event(int pos, size_t & i, vector<differences_str> & events) {
  */
 
 vector<differences_str> Alignment::summarize_csstring(std::vector<indel_str> &dels) {
+
 	string cs = this->get_cs();
 	int pos = this->getPosition();
 	int corr = 0;
@@ -137,8 +138,11 @@ vector<differences_str> Alignment::summarizeAlignment(std::vector<indel_str> &de
 		read_pos += al->CigarData[0].Length;
 	}
 
+	int sum_mis = 0;
+	int sum_events=0;
+	int sum_single=0;
 	for (size_t i = 0; i < al->CigarData.size(); i++) {
-		if (al->CigarData[i].Type == 'M') {
+		if (al->CigarData[i].Type == 'M' || (al->CigarData[i].Type == '=' || al->CigarData[i].Type == 'X')) {
 			pos += al->CigarData[i].Length;
 			read_pos += al->CigarData[i].Length;
 		} else if (al->CigarData[i].Type == 'D') {
@@ -146,6 +150,11 @@ vector<differences_str> Alignment::summarizeAlignment(std::vector<indel_str> &de
 			ev.type = al->CigarData[i].Length; //deletion
 			ev.readposition = read_pos;
 			ev.resolved = true;
+			if (al->CigarData[i].Length >2) {
+				sum_events++;
+			}else{
+				sum_single++;
+			}
 			events.push_back(ev);
 			pos += al->CigarData[i].Length;
 		} else if (al->CigarData[i].Type == 'I') {
@@ -153,13 +162,19 @@ vector<differences_str> Alignment::summarizeAlignment(std::vector<indel_str> &de
 			ev.resolved = true;
 			ev.readposition = read_pos;
 			ev.type = al->CigarData[i].Length * -1; //insertion
+			if (al->CigarData[i].Length >2) {
+				sum_events++;
+			}else{
+				sum_single++;
+			}
+
 			events.push_back(ev);
 			read_pos += al->CigarData[i].Length;
 		} else if (al->CigarData[i].Type == 'N') {
 			pos += al->CigarData[i].Length;
 			ev.resolved = true;
 			read_pos += al->CigarData[i].Length;
-		} else if (al->CigarData[i].Type == 'S' && al->CigarData[i].Length > Parameter::Instance()->huge_ins) { /// Used for reads ranging into an inser
+		} else if ((al->CigarData[i].Type == 'S' ||  al->CigarData[i].Type == 'H' )&& al->CigarData[i].Length > Parameter::Instance()->huge_ins) { /// Used for reads ranging into an inser
 			string sa;
 			al->GetTag("SA", sa);
 			uint32_t sv;
@@ -217,6 +232,7 @@ vector<differences_str> Alignment::summarizeAlignment(std::vector<indel_str> &de
 					}
 					ref_pos++;
 				}
+				sum_mis++;
 				//store in sorted order:
 				add_event(pos, pos_events, events);
 				pos++;			//just the pos on ref!
@@ -240,6 +256,12 @@ vector<differences_str> Alignment::summarizeAlignment(std::vector<indel_str> &de
 		}
 	}
 
+	//if (flag) {
+	//	std::cout << this->getName() << " " << (double) sum_mis << " " << (double) sum_events<<" "<<sum_single <<" "<< (double) sum_mis / (double) (sum_single+sum_mis)<< endl;
+	//}
+	//if (Parameter::Instance()->ccs_reads && (double) sum_mis / (double) (sum_events+sum_mis) > 0.95) {
+	//	events.clear();
+	//}
 //	Parameter::Instance()->meassure_time(comp_aln, "\t\tMD string: ");
 
 	return events;
@@ -693,7 +715,7 @@ vector<aln_str> Alignment::getSA(RefVector ref) {
 		uint32_t sv;
 		al->GetTag("SV", sv);
 		tmp.cross_N = ((sv & Ns_CLIPPED));
-		bool flag = strcmp(getName().c_str(),"0bac61ef-7819-462b-ae3d-32c68fe580c0")==0; //Parameter::Instance()->read_name.c_str()) == 0;
+		bool flag = strcmp(getName().c_str(), "0bac61ef-7819-462b-ae3d-32c68fe580c0") == 0; //Parameter::Instance()->read_name.c_str()) == 0;
 
 		get_coords(tmp, tmp.read_pos_start, tmp.read_pos_stop);
 		if (flag) {
@@ -926,7 +948,7 @@ std::string Alignment::get_md() {
 		return md;
 	} else {
 		std::cerr << "No MD string detected! Check bam file! Otherwise generate using e.g. samtools." << std::endl;
-		cout<<"MD: TEST" << this->getName()<<endl;
+		cout << "MD: TEST" << this->getName() << endl;
 		exit(0);
 	}
 	return md;
@@ -1117,10 +1139,10 @@ vector<str_event> Alignment::get_events_Aln() {
 //clock_t comp_aln = clock();
 	std::vector<indel_str> dels;
 	vector<differences_str> event_aln;
-	/*	if (Parameter::Instance()->cs_string) {
-	 cout<<"run cs check "<<std::endl;
-	 event_aln = summarize_csstring(dels);
-	 } else {*/
+//	if (Parameter::Instance()->cs_string) {/
+//		cout << "run cs check " << std::endl;
+//		event_aln = summarize_csstring(dels);
+//	} else {
 	event_aln = summarizeAlignment(dels);
 //	}
 //double time2 = Parameter::Instance()->meassure_time(comp_aln, "\tcompAln Events: ");


=====================================
src/Paramer.h
=====================================
@@ -25,7 +25,7 @@ class Parameter {
 private:
 	Parameter() {
 		window_thresh=10;//TODO check!
-		version="1.0.10";
+		version="1.0.11";
 		huge_ins = 999;//TODO check??
 	}
 	~Parameter() {
@@ -85,6 +85,7 @@ public:
 	bool skip_parameter_estimation;
 	bool cs_string;
 	bool read_strand;
+	bool ccs_reads;
 
 	void set_regions(std::string reg) {
 		size_t i = 0;


=====================================
src/Sniffles.cpp
=====================================
@@ -95,6 +95,9 @@ void read_parameters(int argc, char *argv[]) {
 	TCLAP::SwitchArg arg_cs_string("", "cs_string", "Enables the scan of CS string instead of Cigar and MD. ", cmd, false);
 	TCLAP::SwitchArg arg_read_strand("", "report_read_strands", "Enables the report of the strand categories per read. (Beta) ", cmd, false);
 
+	TCLAP::SwitchArg arg_ccs("", "ccs_reads", "Preset CCS Pacbio setting. (Beta) ", cmd, false);
+
+
 	TCLAP::ValueArg<float> arg_allelefreq("f", "allelefreq", "Threshold on allele frequency (0-1). ", false, 0.0, "float", cmd);
 	TCLAP::ValueArg<float> arg_hetfreq("", "min_het_af", "Threshold on allele frequency (0-1). ", false, 0.3, "float", cmd);
 	TCLAP::ValueArg<float> arg_homofreq("", "min_homo_af", "Threshold on allele frequency (0-1). ", false, 0.8, "float", cmd);
@@ -143,6 +146,7 @@ void read_parameters(int argc, char *argv[]) {
 	printParameter(usage, arg_seq);
 	printParameter(usage, arg_std);
 	printParameter(usage,arg_read_strand);
+	printParameter(usage,arg_ccs);
 
 	usage << "" << std::endl;
 	usage << "Parameter estimation:" << std::endl;
@@ -179,7 +183,7 @@ void read_parameters(int argc, char *argv[]) {
 	Parameter::Instance()->change_coords = arg_coords.getValue();
 	Parameter::Instance()->debug = true;
 	Parameter::Instance()->score_treshold = 10;
-	Parameter::Instance()->read_name = "0bac61ef-7819-462b-ae3d-32c68fe580c0"; //21_16296949_+";//21_40181680_-";//m151102_123142_42286_c100922632550000001823194205121665_s1_p0/80643/0_20394"; //"22_36746138"; //just for debuging reasons!
+	Parameter::Instance()->read_name = " ";//m54238_180925_225123/56099701/ccs";//m54238_180926_231301/43516780/ccs"; //21_16296949_+";//21_40181680_-";//m151102_123142_42286_c100922632550000001823194205121665_s1_p0/80643/0_20394"; //"22_36746138"; //just for debuging reasons!
 	Parameter::Instance()->bam_files.push_back(arg_bamfile.getValue());
 	Parameter::Instance()->min_mq = arg_mq.getValue();
 	Parameter::Instance()->output_vcf = arg_vcf.getValue();
@@ -198,7 +202,7 @@ void read_parameters(int argc, char *argv[]) {
 	Parameter::Instance()->min_segment_size = arg_segsize.getValue();
 	Parameter::Instance()->reportBND = arg_bnd.getValue();
 	Parameter::Instance()->input_vcf = arg_input_vcf.getValue();
-	Parameter::Instance()->print_seq = arg_seq.getValue();
+	Parameter::Instance()->print_seq = true;//arg_seq.getValue();
 	Parameter::Instance()->ignore_std = arg_std.getValue();
 	Parameter::Instance()->min_zmw = arg_zmw.getValue();
 	Parameter::Instance()->homfreq = arg_homofreq.getValue();
@@ -206,7 +210,13 @@ void read_parameters(int argc, char *argv[]) {
 	Parameter::Instance()->skip_parameter_estimation = arg_parameter.getValue();
 	Parameter::Instance()->cs_string = arg_cs_string.getValue();
 	Parameter::Instance()->read_strand=arg_read_strand.getValue();
+	Parameter::Instance()->ccs_reads=arg_ccs.getValue();
+
+	if(Parameter::Instance()->ccs_reads){
+		Parameter::Instance()->skip_parameter_estimation=true;
+		Parameter::Instance()->ignore_std=false;
 
+	}
 	if (Parameter::Instance()->skip_parameter_estimation) {
 		cout<<"\tSkip parameter estimation."<<endl;
 		Parameter::Instance()->score_treshold = 2;
@@ -216,6 +226,7 @@ void read_parameters(int argc, char *argv[]) {
 		Parameter::Instance()->avg_ins = arg_insratio.getValue();
 	}
 
+
 	//Parse IDS:
 	/*std::string buffer = arg_chrs.getValue();
 	 int count = 0;


=====================================
src/print/VCFPrinter.cpp
=====================================
@@ -8,7 +8,7 @@
 #include "VCFPrinter.h"
 
 void VCFPrinter::print_header() {
-	fprintf(file, "%s", "##fileformat=VCFv4.3\n");
+	fprintf(file, "%s", "##fileformat=VCFv4.1\n");
 	fprintf(file, "%s", "##source=Sniffles\n");
 	string time = currentDateTime();
 	fprintf(file, "%s", "##fileDate=");
@@ -43,7 +43,7 @@ void VCFPrinter::print_header() {
 	fprintf(file, "%s", "##INFO=<ID=SVMETHOD,Number=1,Type=String,Description=\"Type of approach used to detect SV\">\n");
 	fprintf(file, "%s", "##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">\n");
 	if (Parameter::Instance()->report_n_reads > 0 || Parameter::Instance()->report_n_reads == -1) {
-		fprintf(file, "%s", "##INFO=<ID=RNAMES,Number=1,Type=String,Description=\"Names of reads supporting SVs (comma separated)\">\n");
+		fprintf(file, "%s", "##INFO=<ID=RNAMES,Number=.,Type=String,Description=\"Names of reads supporting SVs (comma separated)\">\n");
 	}
 	if (Parameter::Instance()->print_seq) {
 		fprintf(file, "%s", "##INFO=<ID=SEQ,Number=1,Type=String,Description=\"Extracted sequence from the best representative read.\">\n");
@@ -54,11 +54,11 @@ void VCFPrinter::print_header() {
 		fprintf(file, "%s", "##INFO=<ID=REF_strand,Number=2,Type=Integer,Description=\"plus strand ref, minus strand ref.\">\n");
 	}
 
-	fprintf(file, "%s", "##INFO=<ID=STD_quant_start,Number=A,Type=Integer,Description=\"STD of the start breakpoints across the reads.\">\n");
-	fprintf(file, "%s", "##INFO=<ID=STD_quant_stop,Number=A,Type=Integer,Description=\"STD of the stop breakpoints across the reads.\">\n");
-	fprintf(file, "%s", "##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Integer,Description=\"Kurtosis value of the start breakpoints across the reads.\">\n");
-	fprintf(file, "%s", "##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Integer,Description=\"Kurtosis value of the stop breakpoints across the reads.\">\n");
-	fprintf(file, "%s", "##INFO=<ID=SUPTYPE,Number=1,Type=String,Description=\"Type by which the variant is supported.(SR,ALN,NR)\">\n");
+	fprintf(file, "%s", "##INFO=<ID=STD_quant_start,Number=A,Type=Float,Description=\"STD of the start breakpoints across the reads.\">\n");
+	fprintf(file, "%s", "##INFO=<ID=STD_quant_stop,Number=A,Type=Float,Description=\"STD of the stop breakpoints across the reads.\">\n");
+	fprintf(file, "%s", "##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Float,Description=\"Kurtosis value of the start breakpoints across the reads.\">\n");
+	fprintf(file, "%s", "##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Float,Description=\"Kurtosis value of the stop breakpoints across the reads.\">\n");
+	fprintf(file, "%s", "##INFO=<ID=SUPTYPE,Number=A,Type=String,Description=\"Type by which the variant is supported.(SR,ALN,NR)\">\n");
 	fprintf(file, "%s", "##INFO=<ID=STRANDS,Number=A,Type=String,Description=\"Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)\">\n");
 	fprintf(file, "%s", "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency.\">\n");
 	fprintf(file, "%s", "##INFO=<ID=ZMW,Number=A,Type=Integer,Description=\"Number of ZMWs (Pacbio) supporting SV.\">\n");
@@ -106,10 +106,11 @@ void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
 
 			int end = IPrinter::calc_pos(end_coord, ref, chr);
 			std::string strands = SV->get_strand(1);
-			fprintf(file, "%s", "\tN\t");
+
 			if (Parameter::Instance()->reportBND && (SV->get_SVtype() & TRA)) {
 				//N[22:36765684[ +-
 				//]21:10540232]N -+
+				fprintf(file, "%s", "\tN\t");
 				if (strands[0] == '-') { //&&
 					fprintf(file, "%s", "]");
 					fprintf(file, "%s", chr.c_str());
@@ -124,8 +125,22 @@ void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
 					fprintf(file, "%i", end);
 					fprintf(file, "%c", '[');
 				}
-			} else {
+			} else if (!SV->get_sequence().empty() && ((SV->get_SVtype() & INS) || (SV->get_SVtype() & DEL))) {
+				fprintf(file, "%c", '\t');
+				if ((SV->get_SVtype() & DEL)) {
+					fprintf(file, "%s", SV->get_sequence().c_str());
+				} else {
+					fprintf(file, "%c", 'N');
+				}
+				fprintf(file, "%c", '\t');
+				if ((SV->get_SVtype() & INS)) {
+					fprintf(file, "%s", SV->get_sequence().c_str());
+				} else {
+					fprintf(file, "%c", 'N');
+				}
 
+			} else {
+				fprintf(file, "%s", "\tN\t");
 				fprintf(file, "%c", '<');
 				fprintf(file, "%s", IPrinter::get_type(SV->get_SVtype()).c_str());
 				fprintf(file, "%c", '>');
@@ -145,16 +160,19 @@ void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
 
 			fprintf(file, "%s", ";SVMETHOD=Snifflesv");
 			fprintf(file, "%s", Parameter::Instance()->version.c_str());
+
 			if (!(Parameter::Instance()->reportBND && (SV->get_SVtype() & TRA))) {
+
 				fprintf(file, "%s", ";CHR2=");
 				fprintf(file, "%s", chr.c_str());
 				fprintf(file, "%s", ";END=");
 
-				//if (SV->get_SVtype() & INS) {
-				//	fprintf(file, "%i", std::max((int) (end - SV->get_length()), start));
-				//} else {
-				fprintf(file, "%i", end);
-				//}
+				if (SV->get_SVtype() & INS) {
+					fprintf(file, "%i", std::max((int) end, start));
+				} else {
+
+					fprintf(file, "%i", end);
+				}
 			}
 			if (zmws != 0) {
 				fprintf(file, "%s", ";ZMW=");
@@ -186,11 +204,11 @@ void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
 
 			if (((SV->get_SVtype() & INS) && SV->get_length() == Parameter::Instance()->huge_ins) && SV->get_types().is_ALN) {				//!
 				fprintf(file, "%i", 999999999);
-			}else if (SV->get_SVtype() & TRA){
-				fprintf(file, "%i",0);
-			}else if (SV->get_SVtype() & DEL){
-				fprintf(file, "%i", SV->get_length()*-1);
-			}else {
+			} else if (SV->get_SVtype() & TRA) {
+				fprintf(file, "%i", 0);
+			} else if (SV->get_SVtype() & DEL) {
+				fprintf(file, "%i", SV->get_length() * -1);
+			} else {
 				fprintf(file, "%i", SV->get_length());
 			}
 			//	}
@@ -226,10 +244,10 @@ void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
 				fprintf(file, "%i", tmp_stop.second);
 			}
 
-			if (Parameter::Instance()->print_seq && !SV->get_sequence().empty()) {
-				fprintf(file, "%s", ";SEQ=");
-				fprintf(file, "%s", SV->get_sequence().c_str());
-			}
+			//	if (Parameter::Instance()->print_seq && !SV->get_sequence().empty()) {
+			//		fprintf(file, "%s", ";SEQ=");
+			//		fprintf(file, "%s", SV->get_sequence().c_str());
+			//	}
 			fprintf(file, "%s", ";RE=");
 			fprintf(file, "%i", SV->get_support());
 			//if(Parameter::Instance()->genotype){



View it on GitLab: https://salsa.debian.org/med-team/sniffles/commit/4cb082cc94b887ca4a4226c7af22a26b3e5f5302

-- 
View it on GitLab: https://salsa.debian.org/med-team/sniffles/commit/4cb082cc94b887ca4a4226c7af22a26b3e5f5302
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190201/80648e75/attachment-0001.html>


More information about the debian-med-commit mailing list