[med-svn] [Git][med-team/sniffles][upstream] New upstream version 1.0.11+ds
Andreas Tille
gitlab at salsa.debian.org
Fri Feb 1 14:32:16 GMT 2019
Andreas Tille pushed to branch upstream at Debian Med / sniffles
Commits:
4cb082cc by Andreas Tille at 2019-02-01T14:28:30Z
New upstream version 1.0.11+ds
- - - - -
5 changed files:
- CMakeLists.txt
- src/Alignment.cpp
- src/Paramer.h
- src/Sniffles.cpp
- src/print/VCFPrinter.cpp
Changes:
=====================================
CMakeLists.txt
=====================================
@@ -7,9 +7,9 @@ option(STATIC "Build static binary" OFF)
set( SNIF_VERSION_MINOR 0 )
IF(CMAKE_BUILD_TYPE STREQUAL "Debug")
message(STATUS "Building in debug mode!")
- set( SNIF_VERSION_BUILD 10-debug )
+ set( SNIF_VERSION_BUILD 11-debug )
else()
- set( SNIF_VERSION_BUILD 10 )
+ set( SNIF_VERSION_BUILD 11 )
ENDIF()
=====================================
src/Alignment.cpp
=====================================
@@ -61,6 +61,7 @@ void add_event(int pos, size_t & i, vector<differences_str> & events) {
*/
vector<differences_str> Alignment::summarize_csstring(std::vector<indel_str> &dels) {
+
string cs = this->get_cs();
int pos = this->getPosition();
int corr = 0;
@@ -137,8 +138,11 @@ vector<differences_str> Alignment::summarizeAlignment(std::vector<indel_str> &de
read_pos += al->CigarData[0].Length;
}
+ int sum_mis = 0;
+ int sum_events=0;
+ int sum_single=0;
for (size_t i = 0; i < al->CigarData.size(); i++) {
- if (al->CigarData[i].Type == 'M') {
+ if (al->CigarData[i].Type == 'M' || (al->CigarData[i].Type == '=' || al->CigarData[i].Type == 'X')) {
pos += al->CigarData[i].Length;
read_pos += al->CigarData[i].Length;
} else if (al->CigarData[i].Type == 'D') {
@@ -146,6 +150,11 @@ vector<differences_str> Alignment::summarizeAlignment(std::vector<indel_str> &de
ev.type = al->CigarData[i].Length; //deletion
ev.readposition = read_pos;
ev.resolved = true;
+ if (al->CigarData[i].Length >2) {
+ sum_events++;
+ }else{
+ sum_single++;
+ }
events.push_back(ev);
pos += al->CigarData[i].Length;
} else if (al->CigarData[i].Type == 'I') {
@@ -153,13 +162,19 @@ vector<differences_str> Alignment::summarizeAlignment(std::vector<indel_str> &de
ev.resolved = true;
ev.readposition = read_pos;
ev.type = al->CigarData[i].Length * -1; //insertion
+ if (al->CigarData[i].Length >2) {
+ sum_events++;
+ }else{
+ sum_single++;
+ }
+
events.push_back(ev);
read_pos += al->CigarData[i].Length;
} else if (al->CigarData[i].Type == 'N') {
pos += al->CigarData[i].Length;
ev.resolved = true;
read_pos += al->CigarData[i].Length;
- } else if (al->CigarData[i].Type == 'S' && al->CigarData[i].Length > Parameter::Instance()->huge_ins) { /// Used for reads ranging into an inser
+ } else if ((al->CigarData[i].Type == 'S' || al->CigarData[i].Type == 'H' )&& al->CigarData[i].Length > Parameter::Instance()->huge_ins) { /// Used for reads ranging into an inser
string sa;
al->GetTag("SA", sa);
uint32_t sv;
@@ -217,6 +232,7 @@ vector<differences_str> Alignment::summarizeAlignment(std::vector<indel_str> &de
}
ref_pos++;
}
+ sum_mis++;
//store in sorted order:
add_event(pos, pos_events, events);
pos++; //just the pos on ref!
@@ -240,6 +256,12 @@ vector<differences_str> Alignment::summarizeAlignment(std::vector<indel_str> &de
}
}
+ //if (flag) {
+ // std::cout << this->getName() << " " << (double) sum_mis << " " << (double) sum_events<<" "<<sum_single <<" "<< (double) sum_mis / (double) (sum_single+sum_mis)<< endl;
+ //}
+ //if (Parameter::Instance()->ccs_reads && (double) sum_mis / (double) (sum_events+sum_mis) > 0.95) {
+ // events.clear();
+ //}
// Parameter::Instance()->meassure_time(comp_aln, "\t\tMD string: ");
return events;
@@ -693,7 +715,7 @@ vector<aln_str> Alignment::getSA(RefVector ref) {
uint32_t sv;
al->GetTag("SV", sv);
tmp.cross_N = ((sv & Ns_CLIPPED));
- bool flag = strcmp(getName().c_str(),"0bac61ef-7819-462b-ae3d-32c68fe580c0")==0; //Parameter::Instance()->read_name.c_str()) == 0;
+ bool flag = strcmp(getName().c_str(), "0bac61ef-7819-462b-ae3d-32c68fe580c0") == 0; //Parameter::Instance()->read_name.c_str()) == 0;
get_coords(tmp, tmp.read_pos_start, tmp.read_pos_stop);
if (flag) {
@@ -926,7 +948,7 @@ std::string Alignment::get_md() {
return md;
} else {
std::cerr << "No MD string detected! Check bam file! Otherwise generate using e.g. samtools." << std::endl;
- cout<<"MD: TEST" << this->getName()<<endl;
+ cout << "MD: TEST" << this->getName() << endl;
exit(0);
}
return md;
@@ -1117,10 +1139,10 @@ vector<str_event> Alignment::get_events_Aln() {
//clock_t comp_aln = clock();
std::vector<indel_str> dels;
vector<differences_str> event_aln;
- /* if (Parameter::Instance()->cs_string) {
- cout<<"run cs check "<<std::endl;
- event_aln = summarize_csstring(dels);
- } else {*/
+// if (Parameter::Instance()->cs_string) {/
+// cout << "run cs check " << std::endl;
+// event_aln = summarize_csstring(dels);
+// } else {
event_aln = summarizeAlignment(dels);
// }
//double time2 = Parameter::Instance()->meassure_time(comp_aln, "\tcompAln Events: ");
=====================================
src/Paramer.h
=====================================
@@ -25,7 +25,7 @@ class Parameter {
private:
Parameter() {
window_thresh=10;//TODO check!
- version="1.0.10";
+ version="1.0.11";
huge_ins = 999;//TODO check??
}
~Parameter() {
@@ -85,6 +85,7 @@ public:
bool skip_parameter_estimation;
bool cs_string;
bool read_strand;
+ bool ccs_reads;
void set_regions(std::string reg) {
size_t i = 0;
=====================================
src/Sniffles.cpp
=====================================
@@ -95,6 +95,9 @@ void read_parameters(int argc, char *argv[]) {
TCLAP::SwitchArg arg_cs_string("", "cs_string", "Enables the scan of CS string instead of Cigar and MD. ", cmd, false);
TCLAP::SwitchArg arg_read_strand("", "report_read_strands", "Enables the report of the strand categories per read. (Beta) ", cmd, false);
+ TCLAP::SwitchArg arg_ccs("", "ccs_reads", "Preset CCS Pacbio setting. (Beta) ", cmd, false);
+
+
TCLAP::ValueArg<float> arg_allelefreq("f", "allelefreq", "Threshold on allele frequency (0-1). ", false, 0.0, "float", cmd);
TCLAP::ValueArg<float> arg_hetfreq("", "min_het_af", "Threshold on allele frequency (0-1). ", false, 0.3, "float", cmd);
TCLAP::ValueArg<float> arg_homofreq("", "min_homo_af", "Threshold on allele frequency (0-1). ", false, 0.8, "float", cmd);
@@ -143,6 +146,7 @@ void read_parameters(int argc, char *argv[]) {
printParameter(usage, arg_seq);
printParameter(usage, arg_std);
printParameter(usage,arg_read_strand);
+ printParameter(usage,arg_ccs);
usage << "" << std::endl;
usage << "Parameter estimation:" << std::endl;
@@ -179,7 +183,7 @@ void read_parameters(int argc, char *argv[]) {
Parameter::Instance()->change_coords = arg_coords.getValue();
Parameter::Instance()->debug = true;
Parameter::Instance()->score_treshold = 10;
- Parameter::Instance()->read_name = "0bac61ef-7819-462b-ae3d-32c68fe580c0"; //21_16296949_+";//21_40181680_-";//m151102_123142_42286_c100922632550000001823194205121665_s1_p0/80643/0_20394"; //"22_36746138"; //just for debuging reasons!
+ Parameter::Instance()->read_name = " ";//m54238_180925_225123/56099701/ccs";//m54238_180926_231301/43516780/ccs"; //21_16296949_+";//21_40181680_-";//m151102_123142_42286_c100922632550000001823194205121665_s1_p0/80643/0_20394"; //"22_36746138"; //just for debuging reasons!
Parameter::Instance()->bam_files.push_back(arg_bamfile.getValue());
Parameter::Instance()->min_mq = arg_mq.getValue();
Parameter::Instance()->output_vcf = arg_vcf.getValue();
@@ -198,7 +202,7 @@ void read_parameters(int argc, char *argv[]) {
Parameter::Instance()->min_segment_size = arg_segsize.getValue();
Parameter::Instance()->reportBND = arg_bnd.getValue();
Parameter::Instance()->input_vcf = arg_input_vcf.getValue();
- Parameter::Instance()->print_seq = arg_seq.getValue();
+ Parameter::Instance()->print_seq = true;//arg_seq.getValue();
Parameter::Instance()->ignore_std = arg_std.getValue();
Parameter::Instance()->min_zmw = arg_zmw.getValue();
Parameter::Instance()->homfreq = arg_homofreq.getValue();
@@ -206,7 +210,13 @@ void read_parameters(int argc, char *argv[]) {
Parameter::Instance()->skip_parameter_estimation = arg_parameter.getValue();
Parameter::Instance()->cs_string = arg_cs_string.getValue();
Parameter::Instance()->read_strand=arg_read_strand.getValue();
+ Parameter::Instance()->ccs_reads=arg_ccs.getValue();
+
+ if(Parameter::Instance()->ccs_reads){
+ Parameter::Instance()->skip_parameter_estimation=true;
+ Parameter::Instance()->ignore_std=false;
+ }
if (Parameter::Instance()->skip_parameter_estimation) {
cout<<"\tSkip parameter estimation."<<endl;
Parameter::Instance()->score_treshold = 2;
@@ -216,6 +226,7 @@ void read_parameters(int argc, char *argv[]) {
Parameter::Instance()->avg_ins = arg_insratio.getValue();
}
+
//Parse IDS:
/*std::string buffer = arg_chrs.getValue();
int count = 0;
=====================================
src/print/VCFPrinter.cpp
=====================================
@@ -8,7 +8,7 @@
#include "VCFPrinter.h"
void VCFPrinter::print_header() {
- fprintf(file, "%s", "##fileformat=VCFv4.3\n");
+ fprintf(file, "%s", "##fileformat=VCFv4.1\n");
fprintf(file, "%s", "##source=Sniffles\n");
string time = currentDateTime();
fprintf(file, "%s", "##fileDate=");
@@ -43,7 +43,7 @@ void VCFPrinter::print_header() {
fprintf(file, "%s", "##INFO=<ID=SVMETHOD,Number=1,Type=String,Description=\"Type of approach used to detect SV\">\n");
fprintf(file, "%s", "##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">\n");
if (Parameter::Instance()->report_n_reads > 0 || Parameter::Instance()->report_n_reads == -1) {
- fprintf(file, "%s", "##INFO=<ID=RNAMES,Number=1,Type=String,Description=\"Names of reads supporting SVs (comma separated)\">\n");
+ fprintf(file, "%s", "##INFO=<ID=RNAMES,Number=.,Type=String,Description=\"Names of reads supporting SVs (comma separated)\">\n");
}
if (Parameter::Instance()->print_seq) {
fprintf(file, "%s", "##INFO=<ID=SEQ,Number=1,Type=String,Description=\"Extracted sequence from the best representative read.\">\n");
@@ -54,11 +54,11 @@ void VCFPrinter::print_header() {
fprintf(file, "%s", "##INFO=<ID=REF_strand,Number=2,Type=Integer,Description=\"plus strand ref, minus strand ref.\">\n");
}
- fprintf(file, "%s", "##INFO=<ID=STD_quant_start,Number=A,Type=Integer,Description=\"STD of the start breakpoints across the reads.\">\n");
- fprintf(file, "%s", "##INFO=<ID=STD_quant_stop,Number=A,Type=Integer,Description=\"STD of the stop breakpoints across the reads.\">\n");
- fprintf(file, "%s", "##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Integer,Description=\"Kurtosis value of the start breakpoints across the reads.\">\n");
- fprintf(file, "%s", "##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Integer,Description=\"Kurtosis value of the stop breakpoints across the reads.\">\n");
- fprintf(file, "%s", "##INFO=<ID=SUPTYPE,Number=1,Type=String,Description=\"Type by which the variant is supported.(SR,ALN,NR)\">\n");
+ fprintf(file, "%s", "##INFO=<ID=STD_quant_start,Number=A,Type=Float,Description=\"STD of the start breakpoints across the reads.\">\n");
+ fprintf(file, "%s", "##INFO=<ID=STD_quant_stop,Number=A,Type=Float,Description=\"STD of the stop breakpoints across the reads.\">\n");
+ fprintf(file, "%s", "##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Float,Description=\"Kurtosis value of the start breakpoints across the reads.\">\n");
+ fprintf(file, "%s", "##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Float,Description=\"Kurtosis value of the stop breakpoints across the reads.\">\n");
+ fprintf(file, "%s", "##INFO=<ID=SUPTYPE,Number=A,Type=String,Description=\"Type by which the variant is supported.(SR,ALN,NR)\">\n");
fprintf(file, "%s", "##INFO=<ID=STRANDS,Number=A,Type=String,Description=\"Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)\">\n");
fprintf(file, "%s", "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency.\">\n");
fprintf(file, "%s", "##INFO=<ID=ZMW,Number=A,Type=Integer,Description=\"Number of ZMWs (Pacbio) supporting SV.\">\n");
@@ -106,10 +106,11 @@ void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
int end = IPrinter::calc_pos(end_coord, ref, chr);
std::string strands = SV->get_strand(1);
- fprintf(file, "%s", "\tN\t");
+
if (Parameter::Instance()->reportBND && (SV->get_SVtype() & TRA)) {
//N[22:36765684[ +-
//]21:10540232]N -+
+ fprintf(file, "%s", "\tN\t");
if (strands[0] == '-') { //&&
fprintf(file, "%s", "]");
fprintf(file, "%s", chr.c_str());
@@ -124,8 +125,22 @@ void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
fprintf(file, "%i", end);
fprintf(file, "%c", '[');
}
- } else {
+ } else if (!SV->get_sequence().empty() && ((SV->get_SVtype() & INS) || (SV->get_SVtype() & DEL))) {
+ fprintf(file, "%c", '\t');
+ if ((SV->get_SVtype() & DEL)) {
+ fprintf(file, "%s", SV->get_sequence().c_str());
+ } else {
+ fprintf(file, "%c", 'N');
+ }
+ fprintf(file, "%c", '\t');
+ if ((SV->get_SVtype() & INS)) {
+ fprintf(file, "%s", SV->get_sequence().c_str());
+ } else {
+ fprintf(file, "%c", 'N');
+ }
+ } else {
+ fprintf(file, "%s", "\tN\t");
fprintf(file, "%c", '<');
fprintf(file, "%s", IPrinter::get_type(SV->get_SVtype()).c_str());
fprintf(file, "%c", '>');
@@ -145,16 +160,19 @@ void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
fprintf(file, "%s", ";SVMETHOD=Snifflesv");
fprintf(file, "%s", Parameter::Instance()->version.c_str());
+
if (!(Parameter::Instance()->reportBND && (SV->get_SVtype() & TRA))) {
+
fprintf(file, "%s", ";CHR2=");
fprintf(file, "%s", chr.c_str());
fprintf(file, "%s", ";END=");
- //if (SV->get_SVtype() & INS) {
- // fprintf(file, "%i", std::max((int) (end - SV->get_length()), start));
- //} else {
- fprintf(file, "%i", end);
- //}
+ if (SV->get_SVtype() & INS) {
+ fprintf(file, "%i", std::max((int) end, start));
+ } else {
+
+ fprintf(file, "%i", end);
+ }
}
if (zmws != 0) {
fprintf(file, "%s", ";ZMW=");
@@ -186,11 +204,11 @@ void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
if (((SV->get_SVtype() & INS) && SV->get_length() == Parameter::Instance()->huge_ins) && SV->get_types().is_ALN) { //!
fprintf(file, "%i", 999999999);
- }else if (SV->get_SVtype() & TRA){
- fprintf(file, "%i",0);
- }else if (SV->get_SVtype() & DEL){
- fprintf(file, "%i", SV->get_length()*-1);
- }else {
+ } else if (SV->get_SVtype() & TRA) {
+ fprintf(file, "%i", 0);
+ } else if (SV->get_SVtype() & DEL) {
+ fprintf(file, "%i", SV->get_length() * -1);
+ } else {
fprintf(file, "%i", SV->get_length());
}
// }
@@ -226,10 +244,10 @@ void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
fprintf(file, "%i", tmp_stop.second);
}
- if (Parameter::Instance()->print_seq && !SV->get_sequence().empty()) {
- fprintf(file, "%s", ";SEQ=");
- fprintf(file, "%s", SV->get_sequence().c_str());
- }
+ // if (Parameter::Instance()->print_seq && !SV->get_sequence().empty()) {
+ // fprintf(file, "%s", ";SEQ=");
+ // fprintf(file, "%s", SV->get_sequence().c_str());
+ // }
fprintf(file, "%s", ";RE=");
fprintf(file, "%i", SV->get_support());
//if(Parameter::Instance()->genotype){
View it on GitLab: https://salsa.debian.org/med-team/sniffles/commit/4cb082cc94b887ca4a4226c7af22a26b3e5f5302
--
View it on GitLab: https://salsa.debian.org/med-team/sniffles/commit/4cb082cc94b887ca4a4226c7af22a26b3e5f5302
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190201/80648e75/attachment-0001.html>
More information about the debian-med-commit
mailing list