[med-svn] [sniffles] 01/06: Imported Upstream version 1.0.0+ds
Afif Elghraoui
afif at moszumanska.debian.org
Mon Oct 17 08:42:08 UTC 2016
This is an automated email from the git hooks/post-receive script.
afif pushed a commit to branch master
in repository sniffles.
commit 5b80020ab41cf672d498c6e81e5ea5d21119d98a
Author: Afif Elghraoui <afif at debian.org>
Date: Fri Oct 14 21:08:23 2016 -0700
Imported Upstream version 1.0.0+ds
---
CMakeLists.txt | 10 +-
makefile | 62 ++
objects.mk | 8 +
sources.mk | 30 +
src/Alignment.cpp | 933 ++++++++++++++++++-----
src/Alignment.d | 54 ++
src/Alignment.h | 34 +-
src/Alignment.o | Bin 0 -> 628428 bytes
src/BamParser.cpp | 5 +-
src/BamParser.d | 58 ++
src/BamParser.o | Bin 0 -> 298480 bytes
src/CMakeLists.txt | 18 +-
src/Genotyper/Genotyper.cpp | 246 ++++++
src/Genotyper/Genotyper.h | 40 +
src/Paramer.h | 27 +-
src/Parser.cpp | 8 -
src/Parser.d | 56 ++
src/Parser.h | 8 +-
src/Parser.o | Bin 0 -> 8376 bytes
src/Sniffles.cpp | 125 ++-
src/Sniffles.d | 148 ++++
src/Sniffles.o | Bin 0 -> 812932 bytes
src/cluster/Cluster_SVs.cpp | 149 ++++
src/cluster/Cluster_SVs.h | 41 +
src/phasing/PhaserSV.cpp | 13 -
src/phasing/PhaserSV.h | 24 -
src/plane-sweep/Main.d | 1 +
src/plane-sweep/Main.o | Bin 0 -> 1380 bytes
src/plane-sweep/MyHeap.d | 62 ++
src/plane-sweep/MyHeap.o | Bin 0 -> 179100 bytes
src/plane-sweep/MyList.d | 62 ++
src/plane-sweep/MyList.o | Bin 0 -> 91880 bytes
src/plane-sweep/PlaneSweep.d | 67 ++
src/plane-sweep/PlaneSweep.o | Bin 0 -> 125776 bytes
src/plane-sweep/PlaneSweep_slim.cpp | 43 ++
src/plane-sweep/PlaneSweep_slim.h | 40 +
src/plane-sweep/subdir.mk | 33 +
src/print/BedePrinter.cpp | 59 --
src/print/{MariaPrinter.cpp => BedpePrinter.cpp} | 20 +-
src/print/{BedePrinter.h => BedpePrinter.h} | 12 +-
src/print/IPrinter.cpp | 46 +-
src/print/IPrinter.h | 29 +-
src/print/MariaPrinter.h | 24 -
src/print/VCFPrinter.cpp | 42 +-
src/sub/Breakpoint.cpp | 511 ++++++++-----
src/sub/Breakpoint.h | 35 +-
src/sub/Detect_Breakpoints.cpp | 453 +++++++----
src/sub/Detect_Breakpoints.h | 10 +-
src/sub/Detect_Breapoints.d | 82 ++
src/sub/Detect_Breapoints.o | Bin 0 -> 877496 bytes
src/sub/subdir.mk | 24 +
src/subdir.mk | 33 +
src/test | 0
src/tree/Breakpoint_Tree.cpp | 257 +++++++
src/tree/Breakpoint_Tree.h | 50 ++
src/tree/IntervallTree.cpp | 20 +-
src/tree/IntervallTree.d | 67 ++
src/tree/IntervallTree.h | 2 +-
src/tree/IntervallTree.o | Bin 0 -> 297288 bytes
src/tree/Intervall_bed.cpp | 2 +-
src/tree/Scapegoat.cpp | 10 -
src/tree/Scapegoat.d | 66 ++
src/tree/Scapegoat.h | 234 ------
src/tree/Scapegoat.o | Bin 0 -> 8688 bytes
src/tree/subdir.mk | 27 +
65 files changed, 3472 insertions(+), 1048 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 65d74f9..6d20302 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,17 +1,17 @@
cmake_minimum_required(VERSION 2.8)
project(Sniffles)
-set( NGM_VERSION_MAJOR 0 )
-set( NGM_VERSION_MINOR 1 )
+set( SNIF_VERSION_MAJOR 1 )
+set( SNIF_VERSION_MINOR 0 )
IF(CMAKE_BUILD_TYPE STREQUAL "Debug")
message(STATUS "Building in debug mode!")
- set( NGM_VERSION_BUILD 0-debug )
+ set( SNIF_VERSION_BUILD 0-debug )
else()
- set( NGM_VERSION_BUILD 0 )
+ set( SNIF_VERSION_BUILD 0 )
ENDIF()
-set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin/sniffles-${NGM_VERSION_MAJOR}.${NGM_VERSION_MINOR}.${NGM_VERSION_BUILD}/)
+set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin/sniffles-core-${SNIF_VERSION_MAJOR}.${SNIF_VERSION_MINOR}.${SNIF_VERSION_BUILD}/)
file(MAKE_DIRECTORY ${EXECUTABLE_OUTPUT_PATH})
diff --git a/makefile b/makefile
new file mode 100644
index 0000000..73965cd
--- /dev/null
+++ b/makefile
@@ -0,0 +1,62 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+-include ../makefile.init
+
+RM := rm -rf
+
+# All of the sources participating in the build are defined here
+-include sources.mk
+-include src/tree/subdir.mk
+-include src/sub/subdir.mk
+-include src/plane-sweep/subdir.mk
+-include src/subdir.mk
+-include subdir.mk
+-include objects.mk
+
+ifneq ($(MAKECMDGOALS),clean)
+ifneq ($(strip $(CC_DEPS)),)
+-include $(CC_DEPS)
+endif
+ifneq ($(strip $(C++_DEPS)),)
+-include $(C++_DEPS)
+endif
+ifneq ($(strip $(C_UPPER_DEPS)),)
+-include $(C_UPPER_DEPS)
+endif
+ifneq ($(strip $(CXX_DEPS)),)
+-include $(CXX_DEPS)
+endif
+ifneq ($(strip $(C_DEPS)),)
+-include $(C_DEPS)
+endif
+ifneq ($(strip $(CPP_DEPS)),)
+-include $(CPP_DEPS)
+endif
+endif
+
+-include ../makefile.defs
+
+# Add inputs and outputs from these tool invocations to the build variables
+
+# All Target
+all: Sniffles
+
+# Tool invocations
+Sniffles: $(OBJS) $(USER_OBJS)
+ @echo 'Building target: $@'
+ @echo 'Invoking: Cross G++ Linker'
+ g++ -L/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/lib -o "Sniffles" $(OBJS) $(USER_OBJS) $(LIBS)
+ @echo 'Finished building target: $@'
+ @echo ' '
+
+# Other Targets
+clean:
+ -$(RM) $(CC_DEPS)$(C++_DEPS)$(EXECUTABLES)$(OBJS)$(C_UPPER_DEPS)$(CXX_DEPS)$(C_DEPS)$(CPP_DEPS) Sniffles
+ - at echo ' '
+
+.PHONY: all clean dependents
+.SECONDARY:
+
+-include ../makefile.targets
diff --git a/objects.mk b/objects.mk
new file mode 100644
index 0000000..deb2291
--- /dev/null
+++ b/objects.mk
@@ -0,0 +1,8 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+USER_OBJS :=
+
+LIBS := -lbamtools
+
diff --git a/sources.mk b/sources.mk
new file mode 100644
index 0000000..2818222
--- /dev/null
+++ b/sources.mk
@@ -0,0 +1,30 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+C_UPPER_SRCS :=
+CXX_SRCS :=
+C++_SRCS :=
+OBJ_SRCS :=
+CC_SRCS :=
+ASM_SRCS :=
+C_SRCS :=
+CPP_SRCS :=
+O_SRCS :=
+S_UPPER_SRCS :=
+CC_DEPS :=
+C++_DEPS :=
+EXECUTABLES :=
+OBJS :=
+C_UPPER_DEPS :=
+CXX_DEPS :=
+C_DEPS :=
+CPP_DEPS :=
+
+# Every subdirectory with source files must be described here
+SUBDIRS := \
+src/tree \
+src/sub \
+src/plane-sweep \
+src \
+
diff --git a/src/Alignment.cpp b/src/Alignment.cpp
index d7b28d2..8e79e89 100644
--- a/src/Alignment.cpp
+++ b/src/Alignment.cpp
@@ -15,74 +15,286 @@ void Alignment::initAlignment() {
}
void Alignment::setAlignment(BamAlignment * align) {
al = align;
- alignment.first.clear();
- alignment.second.clear();
- is_computed = false;
+ /*
+ //comment from here:
+ //todo: why does this influence the INV detection???
+ alignment.first.clear();
+ alignment.second.clear();
+ is_computed = false;
- orig_length = al->QueryBases.size();
- for (size_t i = 0; i < al->QueryBases.size(); i++) {
- alignment.first += toupper(al->QueryBases[i]);
+ orig_length = al->QueryBases.size();
+
+ for (size_t i = 0; i < al->QueryBases.size(); i++) {
+ alignment.first += toupper(al->QueryBases[i]);
+ alignment.second += 'X';
+ }
+
+
+ stop = this->getPosition() + this->getRefLength();*/
+}
+
+void update_aln(std::string & alignment, int & i, int pos_to_modify) {
+ int ref_pos = 0;
+ while (i < alignment.size() && ref_pos != pos_to_modify) {
+ if (alignment[i] != '-') {
+ ref_pos++;
+ }
+ i++;
+ }
+ alignment[i] = 'Y';
+}
+
+void add_event(int pos, list<differences_str>::iterator & i, list<differences_str> & events) {
+ //insert sorted into vector:
+ while (i != events.end() && pos > (*i).position) {
+ i++;
+ }
+ differences_str ev;
+ ev.position = pos;
+ ev.type = 0; //mismatch
+ events.insert(i, ev);
+}
+
+void add_event(int pos, size_t & i, vector<differences_str> & events) {
+ //insert sorted into vector:
+ while (i < events.size() && pos > events[i].position) {
+ i++;
+ }
+ differences_str ev;
+ ev.position = pos;
+ ev.type = 0; //mismatch
+ events.insert(events.begin() + i, ev);
+}
+//todo: check if list changes things
+
+vector<differences_str> Alignment::summarizeAlignment() {
+ //clock_t comp_aln = clock();
+ vector<differences_str> events;
+ int pos = this->getPosition();
+ differences_str ev;
+ bool flag = false; // (strcmp(this->getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0);
+
+ for (size_t i = 0; i < al->CigarData.size(); i++) {
+ if (al->CigarData[i].Type == 'D') {
+ ev.position = pos;
+ ev.type = al->CigarData[i].Length; //deletion
+ events.push_back(ev);
+ pos += al->CigarData[i].Length;
+ } else if (al->CigarData[i].Type == 'I') {
+ ev.position = pos;
+ ev.type = al->CigarData[i].Length * -1; //insertion
+ events.push_back(ev);
+ } else if (al->CigarData[i].Type == 'M') {
+ pos += al->CigarData[i].Length;
+ } else if (al->CigarData[i].Type == 'N') {
+ pos += al->CigarData[i].Length;
+ } else if (al->CigarData[i].Type == 'S' && al->CigarData[i].Length > 4000) {
+ string sa;
+ al->GetTag("SA", sa);
+ if (sa.empty()) { // == no split reads!
+ if (flag) {
+ std::cout << "Chop: " << pos << " Rname: " << this->getName() << std::endl;
+ }
+ if (pos == this->getPosition()) {
+ ev.position = pos - Parameter::Instance()->huge_ins;
+ } else {
+ ev.position = pos;
+ }
+ ev.type = Parameter::Instance()->huge_ins * -1; //insertion: WE have to fix the length since we cannot estimate it!]
+ events.push_back(ev);
+ }
+ }
+ }
+
+ if (flag) {
+ for (size_t i = 0; i < events.size(); i++) {
+ if (abs(events[i].type) > 3000) {
+ cout << events[i].position << " " << events[i].type << endl;
+ }
+ }
+ cout << endl;
+ }
+
+ //set ref length requ. later on:
+ this->ref_len = pos - getPosition(); //TODO compare to get_length!
+
+ //cout<<" comp len: "<<this->ref_len<<" "<<pos<<" "<<this->getPosition()<<endl;
+// Parameter::Instance()->meassure_time(comp_aln, "\t\tCigar: ");
+
+ string md = this->get_md();
+ pos = this->getPosition();
+ int corr = 0;
+ bool match = false;
+ bool gap;
+ int ref_pos = 0;
+ size_t pos_events = 0;
+ int max_size = (this->getRefLength() * 0.9) + getPosition();
+ //comp_aln = clock();
+ for (size_t i = 0; i < md.size() && pos < max_size; i++) {
+ if (md[i] == '^') {
+ gap = true;
+ }
+ if ((atoi(&md[i]) == 0 && md[i] != '0')) { //is not a number
+ if (!gap) { // only mismatches are stored. We should have the rest from CIGAR
+ //correct for shift in position with respect to the ref:
+ while (ref_pos < events.size() && pos > events[ref_pos].position) {
+ if (events[ref_pos].type > 0) {
+ pos += events[ref_pos].type;
+ }
+ ref_pos++;
+ }
+ //store in sorted order:
+ add_event(pos, pos_events, events);
+
+ pos++; //just the pos on ref!
+ }
+ match = false;
+ } else if (!match) {
+ match = true;
+ pos += atoi(&md[i]);
+ gap = false;
+ }
+ }
+
+ if (flag) {
+ for (size_t i = 0; i < events.size(); i++) {
+ if (abs(events[i].type) > 3000) {
+ cout << events[i].position << " " << events[i].type << endl;
+ }
+ }
+ cout << endl;
+ }
+
+ //Parameter::Instance()->meassure_time(comp_aln, "\t\tMD string: ");
+// comp_aln = clock();
+ size_t i = 0;
+ //to erase stretches of consecutive mismatches == N in the ref
+ int break_point = 0;
+ while (i < events.size()) {
+ if (events[i].position > max_size) {
+ while (i < events.size()) {
+ if (abs(events[events.size() - 1].type) == Parameter::Instance()->huge_ins) {
+ i++;
+ } else {
+ events.erase(events.begin() + i, events.begin() + i + 1);
+ }
+ }
+ break;
+ }
+ if (events[i].type == 0) {
+ size_t j = 1;
+ while (i + j < events.size() && ((events[i + j].position - events[i + (j - 1)].position) == 1 && events[i + j].type == 0)) {
+ j++;
+ }
+ if (j > 10) { //if stetch is at least 3 consecutive mismatches
+ events.erase(events.begin() + i, events.begin() + i + j);
+ } else {
+ i += j;
+ }
+ } else {
+ i++;
+ }
+ }
+// Parameter::Instance()->meassure_time(comp_aln, "\t\terrase N: ");
+ if (flag) {
+ cout << "LAST:" << endl;
+ for (size_t i = 0; i < events.size(); i++) {
+ if (abs(events[i].type) > 3000) {
+ cout << events[i].position << " " << events[i].type << endl;
+ }
+ }
+ cout << endl;
}
- stop = this->getPosition() + this->getRefLength();
+
+ return events;
}
void Alignment::computeAlignment() {
+ cout << "COMP ALN!" << endl;
+
+ clock_t comp_aln = clock();
+ int to_del = 0;
int pos = 0;
+
for (size_t i = 0; i < al->CigarData.size(); i++) {
if (al->CigarData[i].Type == 'I') {
- for (uint32_t t = 0; t < al->CigarData[i].Length; t++) {
- alignment.second.insert(pos, "-");
- alignment.second.erase(alignment.second.size() - 1, 1);
- pos++;
- }
+ to_del += al->CigarData[i].Length;
+ alignment.second.insert(pos, al->CigarData[i].Length, '-');
+ pos += al->CigarData[i].Length;
} else if (al->CigarData[i].Type == 'D') {
- for (uint32_t t = 0; t < al->CigarData[i].Length; t++) {
- alignment.first.insert(pos, "-");
- pos++;
- }
- } else if (al->CigarData[i].Type == 'S') {
+ alignment.first.insert(pos, al->CigarData[i].Length, '-');
+ alignment.second.insert(pos, al->CigarData[i].Length, 'X');
+ pos += al->CigarData[i].Length;
+ /*for (uint32_t t = 0; t < al->CigarData[i].Length; t++) {
+ alignment.first.insert(pos, "-");
+ alignment.second.insert(pos, "X");
+ pos++;
+ }*/
+ } else if (al->CigarData[i].Type == 'S') {
if (pos == 0) { //front side
alignment.second.erase(((int) alignment.second.size()) - al->CigarData[i].Length, al->CigarData[i].Length);
} else { //backside
alignment.second.erase(pos, al->CigarData[i].Length);
}
alignment.first.erase(pos, al->CigarData[i].Length);
-
} else if (al->CigarData[i].Type == 'M') {
pos += al->CigarData[i].Length;
} else if (al->CigarData[i].Type == 'H') {
-
+ //nothing todo
} else if (al->CigarData[i].Type == 'N') {
alignment.second.erase(pos, al->CigarData[i].Length);
}
}
- for (size_t i = 0; i < alignment.first.size(); i++) {
- if (alignment.first[i] == '=') {
- alignment.first[i] = alignment.second[i];
- }
+ if (to_del > 0) {
+ alignment.second = alignment.second.substr(0, alignment.second.size() - to_del);
+ //alignment.second.erase(alignment.second.size() - to_del, to_del);
}
+ Parameter::Instance()->meassure_time(comp_aln, "\t\tCIGAR opterations ");
+ comp_aln = clock();
+ //Apply MD string:
+ string md = this->get_md();
+ pos = 0;
+ int corr = 0;
+ bool match = false;
+ int last_pos_string = 0;
+ int last_pos_ref = 0;
- is_computed = true;
+ for (size_t i = 0; i < md.size(); i++) {
+ if (atoi(&md[i]) == 0 && md[i] != '0') { //is not a number!
+ if (md[i] != '^') {
+ update_aln(alignment.second, last_pos_string, pos - last_pos_ref);
+ last_pos_ref = pos;
+ pos++;
+ }
+ match = false;
+ } else if (!match) {
+ match = true;
+ pos += atoi(&md[i]);
+ }
+ }
+ Parameter::Instance()->meassure_time(comp_aln, "\t\tMD opterations ");
- if (alignment.first.size() != alignment.second.size()) {
- cerr << "Error alignment has different length" << endl;
- cerr << " ignoring alignment " << al->Name << endl;
- cerr << al->Position << endl;
+ if (alignment.first.size() != alignment.second.size()) { // || strcmp(this->getName().c_str(),"IIIIII_10892000")==0) {
+ //if(al->CigarData[0].Length!=100){
+ cout << "Error alignment has different length" << endl;
+ cout << " ignoring alignment " << al->Name << endl;
+ cout << al->Position << endl;
- cerr << endl;
- cerr << "read: " << alignment.first << endl;
- cerr << endl;
- cerr << " ref: " << alignment.second << endl;
- cerr << endl;
- cerr << orig_length << endl;
+ cout << endl;
+ cout << "read: " << alignment.first << endl;
+ cout << " ref: " << alignment.second << endl;
+ cout << endl;
+ cout << orig_length << endl;
vector<CigarOp> cig = getCigar();
-
for (size_t i = 0; i < cig.size(); i++) {
- cerr << cig[i].Length << cig[i].Type << " ";
+ cout << cig[i].Length << cig[i].Type << " ";
}
- cerr << endl;
- exit(0);
- return;
+ cout << endl;
+
+ cout << this->get_md() << endl;
+
+ // exit(0);
+ // return;
}
}
int32_t Alignment::getPosition() {
@@ -104,14 +316,14 @@ size_t Alignment::get_length(std::vector<CigarOp> CigarData) {
size_t len = 0; //orig_length;
for (size_t i = 0; i < CigarData.size(); i++) {
if (CigarData[i].Type == 'D' || CigarData[i].Type == 'M' || CigarData[i].Type == 'N') {
-
len += CigarData[i].Length;
}
}
return len;
}
size_t Alignment::getRefLength() {
- return get_length(this->al->CigarData);
+ return this->ref_len;
+// return get_length(this->al->CigarData);
}
size_t Alignment::getOrigLen() {
return orig_length;
@@ -128,18 +340,19 @@ string Alignment::getName() {
uint16_t Alignment::getMappingQual() {
return al->MapQuality;
}
-float Alignment::getIdentity() {
- if (is_computed) {
- float match = 0;
- for (size_t i = 0; i < alignment.first.size(); i++) {
- if (alignment.first[i] == alignment.second[i]) {
- match++;
- }
- }
- return match / (float) alignment.first.size();
- }
- return -1;
-}
+
+/*float Alignment::getIdentity() {
+ if (is_computed) {
+ float match = 0;
+ for (size_t i = 0; i < alignment.first.size(); i++) {
+ if (alignment.first[i] == alignment.second[i]) {
+ match++;
+ }
+ }
+ return match / (float) alignment.first.size();
+ }
+ return -1;
+ }*/
int Alignment::getAlignmentFlag() {
return al->AlignmentFlag;
}
@@ -213,43 +426,120 @@ int Alignment::get_id(RefVector ref, std::string chr) {
}
return -1; //should not happen!
}
-void Alignment::get_coords(aln_str tmp, int & start, int &stop) {
- if (tmp.cigar[0].Type == 'S' || tmp.cigar[0].Type == 'H') {
- start = tmp.cigar[0].Length;
- } else {
- start = 0;
+int get_readlen(std::vector<CigarOp> cigar) {
+ int pos = 0;
+ for (size_t i = 0; i < cigar.size(); i++) {
+ if (cigar[i].Type == 'I') {
+ pos += cigar[i].Length;
+ } else if (cigar[i].Type == 'D') {
+ //pos += cigar[i].Length;
+ } else if (cigar[i].Type == 'M') {
+ pos += cigar[i].Length;
+ }
}
+ return pos;
+}
+void Alignment::get_coords(aln_str tmp, int & start, int &stop) {
- size_t index = tmp.cigar.size() - 1;
+ size_t index = 0;
+ if (!tmp.strand) {
+ index = tmp.cigar.size() - 1;
+ }
if (tmp.cigar[index].Type == 'S' || tmp.cigar[index].Type == 'H') {
- stop = tmp.cigar[index].Length;
+ start = tmp.cigar[index].Length;
} else {
- stop = 0;
+ start = 0;
}
+ stop = get_readlen(tmp.cigar) + start;
+}
- if (!tmp.strand) {
- int h = start;
- start = stop;
- stop = h;
+void Alignment::check_entries(vector<aln_str> &entries) {
+ bool flag = strcmp(getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0;
+//Given that we start outside of the INV:
+ if (flag) {
+ cout << "Check:" << endl;
+ for (size_t i = 0; i < entries.size(); i++) {
+ cout << "ENT: " << entries[i].pos << " " << entries[i].pos + entries[i].length << " Read: " << entries[i].read_pos_start << " " << entries[i].read_pos_stop << " ";
+ if (entries[i].strand) {
+ cout << "+" << endl;
+ } else {
+ cout << "-" << endl;
+ }
+ }
}
+ bool left_of = true;
+ vector<aln_str> new_entries = entries;
+ for (size_t i = 1; i < entries.size(); i++) {
+ if (entries[i].strand != entries[i - 1].strand && entries[i].RefID == entries[i - 1].RefID) {
+ int ref_dist = 0;
+ int read_dist = 0;
+ if (entries[0].strand) {
+ ref_dist = abs((entries[i - 1].pos + entries[i - 1].length) - entries[i].pos);
+ read_dist = abs(entries[i - 1].read_pos_stop - entries[i].read_pos_start);
+ } else {
+ ref_dist = abs((entries[i - 1].pos) - (entries[i].pos + entries[i].length));
+ read_dist = abs(entries[i - 1].read_pos_stop - entries[i].read_pos_start);
- /*start = 0;
- stop = 0;
- for (size_t i = 0; i < cigar.size(); i++) {
- if (start == 0 && (cigar[i].Type == 'H' || cigar[i].Type == 'S')) {
- start += cigar[i].Length;
- stop += cigar[i].Length;
- }
- if (cigar[i].Type == 'I' || cigar[i].Type == 'M') {
- stop += cigar[i].Length;
- }
- }*/
+ }
+ if (flag) {
+ cout << "ref dist: " << ref_dist << " Read: " << read_dist << endl;
+ }
+ //ref_dist > 30 &&
+ if (read_dist < 20 && ref_dist / (read_dist + 1) > 3) { //+1 because otherwise there could be a division by 0!
+ if (flag) {
+ cout << "DEL? " << ref_dist << " " << read_dist << endl;
+ }
+ aln_str tmp;
+ tmp.RefID = entries[i].RefID;
+ if (left_of) {
+ tmp.strand = entries[i - 1].strand;
+ if (tmp.strand) {
+ tmp.pos = entries[i].pos - 1;
+ } else {
+ tmp.pos = entries[i].pos + entries[i].length - 1;
+ }
+ left_of = false;
+ } else {
+ tmp.strand = entries[i].strand;
+ if (tmp.strand) {
+ tmp.pos = entries[i - 1].pos + entries[i - 1].length - 1;
+ } else {
+ tmp.pos = entries[i - 1].pos - 1;
+ }
+ left_of = true;
+ }
+ tmp.length = 1;
+ tmp.read_pos_start = entries[i].read_pos_start - 1;
+ tmp.read_pos_stop = tmp.read_pos_start + 1;
+ tmp.mq = 60;
+ sort_insert(tmp, new_entries);
+ } else if (read_dist > 30 && ref_dist < 10) {
+ // cout << "INS? " << this->getName() << endl;
+ }
+ } else {
+ left_of = true; //??
+ }
+ }
+ if (entries.size() < new_entries.size()) {
+ entries = new_entries;
+ }
}
-void sort_insert(aln_str tmp, vector<aln_str> &entries) {
+void Alignment::sort_insert(aln_str tmp, vector<aln_str> &entries) {
+ //TODO detect abnormal distances + directions -> introduce a pseudo base to detect these things later?
for (vector<aln_str>::iterator i = entries.begin(); i != entries.end(); i++) {
- if (tmp.read_pos_start < (*i).read_pos_start) {
+ if ((tmp.read_pos_start == (*i).read_pos_start) && (tmp.pos == (*i).pos) && (tmp.strand == (*i).strand)) { //is the same! should not happen
+ return;
+ }
+ if (!tmp.cigar.empty() && ((*i).read_pos_start <= tmp.read_pos_start && (*i).read_pos_stop >= tmp.read_pos_stop)) { //check for the additional introducded entries
+ return;
+ }
+ if (!tmp.cigar.empty() && ((*i).read_pos_start >= tmp.read_pos_start && (*i).read_pos_stop <= tmp.read_pos_stop)) { //check for the additional introducded entries
+ (*i) = tmp;
+ return;
+ }
+ if ((tmp.read_pos_start < (*i).read_pos_start)) { //insert before
entries.insert(i, tmp);
return;
}
@@ -258,38 +548,35 @@ void sort_insert(aln_str tmp, vector<aln_str> &entries) {
}
vector<aln_str> Alignment::getSA(RefVector ref) {
string sa;
-
vector<aln_str> entries;
if (al->GetTag("SA", sa) && !sa.empty()) {
-
//store the main aln:
aln_str tmp;
tmp.RefID = this->getRefID();
tmp.cigar = this->getCigar();
- tmp.length = this->getRefLength();
+ tmp.length = (long) get_length(tmp.cigar);
tmp.mq = this->getMappingQual();
- tmp.pos = this->getPosition(); //+get_ref_lengths(tmp.RefID, ref);
+ tmp.pos = (long) this->getPosition(); //+get_ref_lengths(tmp.RefID, ref);
tmp.strand = getStrand();
bool flag = strcmp(getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0;
get_coords(tmp, tmp.read_pos_start, tmp.read_pos_stop);
entries.push_back(tmp);
if (flag) {
- std::cout << "Main Read: " << tmp.read_pos_start << " REF: " << tmp.pos << " " << tmp.RefID << std::endl;
+ std::cout << "Main Read: read start:" << tmp.read_pos_start << " REF: " << tmp.pos << " RefID: " << tmp.RefID << std::endl;
}
-
- //parse the rest:
size_t i = 0;
int count = 0;
std::string cigar;
std::string chr;
+ bool nested = true;
while (i < sa.size()) {
if (count == 0 && sa[i] != ',') {
chr += sa[i];
}
if (count == 1 && sa[i - 1] == ',') {
- tmp.pos = atoi(&sa[i]);
+ tmp.pos = (long) atoi(&sa[i]);
}
if (count == 2 && sa[i - 1] == ',') {
tmp.strand = (bool) (sa[i] == '+');
@@ -309,24 +596,29 @@ vector<aln_str> Alignment::getSA(RefVector ref) {
}
if (sa[i] == ';') {
if (tmp.mq > Parameter::Instance()->min_mq && entries.size() <= Parameter::Instance()->max_splits) {
- //check this!
+ //TODO: check this!
tmp.cigar = translate_cigar(cigar); //translates the cigar (string) to a type vector
get_coords(tmp, tmp.read_pos_start, tmp.read_pos_stop); //get the coords on the read.
- tmp.length = get_length(tmp.cigar); //gives the length on the reference.
+ tmp.length = (long) get_length(tmp.cigar); //gives the length on the reference.
tmp.RefID = get_id(ref, chr); //translates back the chr to the id of the chr;
//TODO: should we do something about the MD string?
if (flag) {
- std::cout << "Read: " << tmp.read_pos_start << " REF: " << tmp.pos << " " << tmp.RefID;
+ std::cout << "Read: " << tmp.read_pos_start << " " << tmp.read_pos_stop << " REF: " << tmp.pos << " " << tmp.RefID;
if (tmp.strand) {
std::cout << "+" << std::endl;
} else {
- std::cout << "+" << std::endl;
+ std::cout << "-" << std::endl;
}
}
//tmp.pos+=get_ref_lengths(tmp.RefID, ref);
//insert sorted:
includes_SV = true;
sort_insert(tmp, entries);
+ } else if (tmp.mq < Parameter::Instance()->min_mq) {
+ nested = false;
+ } else { //Ignore read due to too many splits
+ entries.clear();
+ return entries;
}
chr.clear();
cigar.clear();
@@ -336,24 +628,35 @@ vector<aln_str> Alignment::getSA(RefVector ref) {
}
i++;
}
+ if (nested && entries.size() > 2) {
+ check_entries(entries);
+ }
+ if (flag) {
+ for (size_t i = 0; i < entries.size(); i++) {
+ cout << "ENT: " << entries[i].pos << " " << entries[i].pos + entries[i].length << " Read: " << entries[i].read_pos_start << " " << entries[i].read_pos_stop << " ";
+ if (entries[i].strand) {
+ cout << "+" << endl;
+ } else {
+ cout << "-" << endl;
+ }
+ }
+ }
}
return entries;
}
//returns -1 if flags are not set!
double Alignment::get_scrore_ratio() {
- uint score = 0;
- uint subscore = 0;
+ uint score = -1;
+ uint subscore = -1;
if (al->GetTag("AS", score) && al->GetTag("XS", subscore)) {
- if(subscore==0){
- return 40;// -1;
- }
- if (strcmp(getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0) {
- std::cout << getName()<<" score: "<<(double) score / (double) subscore << std::endl;
+ if (subscore == 0) {
+ subscore = 1;
}
+ //cout<<'\t'<<score<<" "<<subscore<<endl;
return (double) score / (double) subscore;
}
- return 100; //TODO: -1
+ return -1;
}
bool Alignment::get_is_save() {
string sa;
@@ -361,13 +664,13 @@ bool Alignment::get_is_save() {
double score = get_scrore_ratio();
/*if((al->GetTag("XA", sa) && !sa.empty())){
- std::cout<<this->getName()<<"XA"<<std::endl;
- }
- if( (al->GetTag("XT", sa) && !sa.empty()) ){
- std::cout<<this->getName()<<"XT"<<std::endl;
- }*/
+ std::cout<<this->getName()<<"XA"<<std::endl;
+ }
+ if( (al->GetTag("XT", sa) && !sa.empty()) ){
+ std::cout<<this->getName()<<"XT"<<std::endl;
+ }*/
- return !((al->GetTag("XA", sa) && !sa.empty()) || (al->GetTag("XT", sa) && !sa.empty()) || (score == -1 || score< Parameter::Instance()->score_treshold)); //TODO: 7.5
+ return !((al->GetTag("XA", sa) && !sa.empty()) || (al->GetTag("XT", sa) && !sa.empty())); //|| (score == -1 || score < Parameter::Instance()->score_treshold)); //TODO: 7.5
}
std::vector<CigarOp> Alignment::translate_cigar(std::string cigar) {
@@ -394,16 +697,16 @@ std::vector<CigarOp> Alignment::translate_cigar(std::string cigar) {
}
double Alignment::get_avg_indel_length_Cigar() {
- double len=0;
- double num=0;
+ double len = 0;
+ double num = 0;
for (size_t i = 0; i < al->CigarData.size(); i++) {
- if ((al->CigarData[i].Type == 'I'||al->CigarData[i].Type == 'D') && al->CigarData[i].Length > 1) {
- len+= al->CigarData[i].Length;
+ if ((al->CigarData[i].Type == 'I' || al->CigarData[i].Type == 'D') && al->CigarData[i].Length > 1) {
+ len += al->CigarData[i].Length;
num++;
}
}
- return len/num;
+ return len / num;
}
vector<str_event> Alignment::get_events_CIGAR() {
@@ -424,7 +727,7 @@ vector<str_event> Alignment::get_events_CIGAR() {
events.push_back(ev);
}
if (al->CigarData[i].Type == 'I' && al->CigarData[i].Length > Parameter::Instance()->min_cigar_event) {
- // std::cout<<"CIGAR: "<<al->CigarData[i].Length<<" "<<this->getName()<<std::endl;
+ // std::cout<<"CIGAR: "<<al->CigarData[i].Length<<" "<<this->getName()<<std::endl;
str_event ev;
ev.length = al->CigarData[i].Length * -1; //insertion;
ev.pos = pos;
@@ -489,119 +792,339 @@ std::string Alignment::get_md() {
}
vector<str_event> Alignment::get_events_MD(int min_mis) {
vector<str_event> events;
- std::string md;
- if (al->GetTag("MD", md)) {
- //TODO: remove:
- bool flag = strcmp(getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0;
+ /*std::string md;
+ if (al->GetTag("MD", md)) {
+ //TODO: remove:
+ bool flag = strcmp(getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0;
- if (flag) {
- std::cout << "found!" << std::endl;
+ if (flag) {
+ std::cout << "found!" << std::endl;
+ }
+ //TODO think of a good threshold!
+ if (get_num_mismatches(md) > Parameter::Instance()->min_num_mismatches) {
+ if (flag) {
+ std::cout << "is_strange!" << std::endl;
+ }
+ //generate a vector that holds the positions of the read
+ std::vector<int> aln;
+ int pos = getPosition();
+
+ for (size_t i = 0; i < al->CigarData.size(); i++) {
+ if (al->CigarData[i].Type == 'I') { //TODO check
+ }
+ if (al->CigarData[i].Type == 'D') {
+ pos += al->CigarData[i].Length;
+ }
+ if (al->CigarData[i].Type == 'M') {
+ for (size_t j = 0; j < al->CigarData[i].Length; j++) {
+ aln.push_back(pos);
+ pos++;
+ //aln += "=";
+ }
+ }
+ }
+ //fill in the mismatches:
+ bool deletion = false;
+ bool match = false;
+ double mis = 0;
+ double len = 0;
+ for (size_t i = 0; i < md.size(); i++) {
+ if ((atoi(&md[i]) == 0 && md[i] != '0')) { //is not a number:
+ if (md[i] == '^') {
+ deletion = true;
+ }
+ if (!deletion) {
+ //mistmatch!!
+ mis++;
+ aln[len] = aln[len] * -1;
+ len++;
+ }
+ match = false;
+ } else if (!match) {
+ len += atoi(&md[i]);
+ match = true;
+ deletion = false;
+ }
+ }
+
+
+ int runlength = 100;
+ str_event ev;
+ ev.pos = -1;
+ ev.length = -1;
+ ev.read_pos = 0;
+ int start = 0;
+ int last = 0;
+ for (size_t i = 0; i < aln.size(); i += 50) { //+=runlength/2 ??
+ //std::cout<<aln[i]<<";";
+ int mis = 0;
+ int first = 0;
+
+ for (size_t j = 0; (j + i) < aln.size() && j < runlength; j++) {
+ if (aln[(i + j)] < 0) {
+ if (first == 0) {
+ first = abs(aln[(i + j)]);
+ }
+ last = abs(aln[(i + j)]);
+ mis++;
+ }
+ }
+ if (mis > min_mis) { //TOOD ratio?
+ if (ev.pos == -1) {
+ start = i;
+ ev.pos = first;
+ ev.read_pos = ev.pos - getPosition();
+ }
+ } else {
+ if ((start > 20 && abs((int) (i + runlength) - (int) aln.size()) > 20) && ev.pos != -1) {
+ if (flag) {
+ std::cout << i << " " << (i + runlength) << " " << aln.size() << std::endl;
+ std::cout << ev.pos << " " << last << " " << std::endl;
+ }
+ includes_SV = true;
+ ev.length = last - ev.pos;
+ if (flag) {
+ std::cout << ev.pos << " " << ev.length << std::endl;
+ }
+ if (ev.length > runlength) {
+ events.push_back(ev);
+ }
+ last = 0;
+ ev.pos = -1;
+ } else {
+ ev.pos = -1;
+ }
+ }
+ }
+ }
+
+ }*/
+ return events;
+}
+
+vector<int> Alignment::get_avg_diff(double &avg_dist) {
+
+ //computeAlignment();
+ //cout<<alignment.first<<endl;
+ //cout<<alignment.second<<endl;
+
+ vector<differences_str> event_aln = summarizeAlignment();
+ vector<int> mis_per_window;
+ PlaneSweep_slim * plane = new PlaneSweep_slim();
+ int min_tresh = 5; //reflects a 10% error rate.
+ //compute the profile of differences:
+ avg_dist=0;
+ double avg_diff=0;
+ //std::cout<<this->getName()<<" "<<this->getPosition()<<std::endl;
+ //std::cout<<" READ: ";
+ for (size_t i = 0; i < event_aln.size(); i++) {
+ //std::cout<<event_aln[i].position<<"\t";
+ if(i>0){
+ avg_dist+=event_aln[i].position-(event_aln[i-1].position+abs(event_aln[i-1].type));
+ }
+ pair_str tmp;
+ tmp.position = -1;
+ if (event_aln[i].type == 0) {
+ tmp = plane->add_mut(event_aln[i].position, 1, min_tresh);
+ } else {
+ tmp = plane->add_mut(event_aln[i].position, abs(event_aln[i].type), min_tresh);
+ }
+ if (tmp.position != -1) { //check that its not the prev event!
+ mis_per_window.push_back(tmp.coverage); //store #mismatch per window each time it exceeds. (which might be every event position!)
+ }
+ }
+ //std::cout<<std::endl;
+ avg_dist=avg_dist/(double)event_aln.size();
+
+ plane->finalyze();
+ return mis_per_window; //total_num /num;
+}
+
+vector<str_event> Alignment::get_events_Aln() {
+
+ //clock_t comp_aln = clock();
+ vector<differences_str> event_aln = summarizeAlignment();
+ //double time2 = Parameter::Instance()->meassure_time(comp_aln, "\tcompAln Events: ");
+
+ vector<str_event> events;
+ PlaneSweep_slim * plane = new PlaneSweep_slim();
+ vector<pair_str> profile;
+// comp_aln = clock();
+ //Parameter::Instance()->read_name = "21_30705246";
+ bool flag = (strcmp(this->getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0);
+ //cout<<" IDENT: "<<(double)event_aln.size()/(double)this->al->Length << " "<<this->getName().c_str()<<endl;
+ if (flag) {
+ cout<<"event: "<<event_aln.size()<<" "<<this->al->Length<<endl;
+ cout << "SCORE: " << get_scrore_ratio() << " IDENT: " << (double) event_aln.size() / (double) this->al->Length << endl;
+ }
+ int diffs = 0;
+ //compute the profile of differences:
+ for (size_t i = 0; i < event_aln.size(); i++) {
+ diffs += abs(event_aln[i].type);
+ pair_str tmp;
+ tmp.position = -1;
+ if (event_aln[i].type == 0) {
+ tmp = plane->add_mut(event_aln[i].position, 1, Parameter::Instance()->window_thresh);
+ } else {
+ tmp = plane->add_mut(event_aln[i].position, abs(event_aln[i].type), Parameter::Instance()->window_thresh);
}
- //TODO think of a good threshold!
- if (get_num_mismatches(md) > Parameter::Instance()->min_num_mismatches) {
+ if (tmp.position != -1 && (profile.empty() || (tmp.position - profile[profile.size() - 1].position) > 100)) {
+ profile.push_back(tmp);
if (flag) {
- std::cout << "is_strange!" << std::endl;
+ cout << "HIT: " << event_aln[i].position << " " << tmp.coverage << endl;
}
- //generate a vector that holds the positions of the read
- std::vector<int> aln;
- int pos = getPosition();
+ }
+ }
+ //Parameter::Instance()->meassure_time(comp_aln, "\tcompProfile: ");
+ /*if (strcmp(getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0) {
+ int prev = 0;
+ prev = getPosition();
+ for (size_t i = 0; i < event_aln.size(); i++) {
+ cout << event_aln[i].position - prev << " " << event_aln[i].type << endl;
- for (size_t i = 0; i < al->CigarData.size(); i++) {
- if (al->CigarData[i].Type == 'I') { //TODO check
- }
- if (al->CigarData[i].Type == 'D') {
- pos += al->CigarData[i].Length;
+ }
+ }*/
+
+ //comp_aln = clock();
+ size_t stop = 0;
+ size_t start = 0;
+ int tot_len = 0;
+ for (size_t i = 0; i < profile.size(); i++) {
+ if (profile[i].position > event_aln[stop].position) {
+
+ //find the postion:
+ size_t pos = 0;
+ while (pos < event_aln.size() && event_aln[pos].position != profile[i].position) {
+ pos++;
+ }
+
+ //run back to find the start:
+ start = pos;
+ int prev = event_aln[pos].position;
+ start = pos;
+ int prev_type = 1;
+ //todo it is actually pos + type and not *type
+ while (start > 0 && (prev - event_aln[start].position) < (Parameter::Instance()->avg_distance)) { //13 //} * abs(event_aln[start].type) + 1)) { //TODO I dont like 13!??
+ prev = event_aln[start].position;
+ prev_type = abs(event_aln[start].type);
+ start--;
+
+ if (prev_type == 0) {
+ prev_type = 1;
}
- if (al->CigarData[i].Type == 'M') {
- for (size_t j = 0; j < al->CigarData[i].Length; j++) {
- aln.push_back(pos);
- pos++;
- //aln += "=";
- }
+ prev += prev_type;
+ }
+ start++; //we are running one too far!
+
+ //run forward to identify the stop:
+ prev = event_aln[pos].position;
+ stop = pos;
+ prev_type = 1;
+
+ while (stop < event_aln.size() && (event_aln[stop].position - prev) < (Parameter::Instance()->avg_distance)) { // * abs(event_aln[stop].type) + 1)) {
+ prev = event_aln[stop].position;
+
+ prev_type = abs(event_aln[stop].type);
+ stop++;
+ if (prev_type == 0) {
+ prev_type = 1;
}
+ prev += prev_type;
}
- //fill in the mismatches:
- bool deletion = false;
- bool match = false;
- double mis = 0;
- double len = 0;
- //MD:Z:106G48^C33C41T0G5^G15G0C52^T15^C5^GC16G0A48^T3^C9^G3^G17^G23^G3^C2^G40C0G6
- for (size_t i = 0; i < md.size(); i++) {
- if ((atoi(&md[i]) == 0 && md[i] != '0')) { //is not a number:
- if (md[i] == '^') {
- deletion = true;
+ stop--;
+
+ int insert_max_pos = 0;
+ int insert_max = 0;
+ if (event_aln[start].type < 0) {
+ insert_max_pos = event_aln[start].position;
+ insert_max = abs(event_aln[start].type);
+ }
+
+ int del_max = 0;
+ int del_max_pos = 0;
+
+ double insert = 0;
+ double del = 0;
+ double mismatch = 0;
+
+ for (size_t k = start; k < stop; k++) {
+ if (flag) {
+ // cout << event_aln[k].position << " " << event_aln[k].type << endl;
+ }
+ if (event_aln[k].type == 0) {
+ mismatch++;
+ } else if (event_aln[k].type > 0) {
+ del += abs(event_aln[k].type);
+ if (del_max < abs(event_aln[k].type)) {
+ del_max = abs(event_aln[k].type);
+ del_max_pos = event_aln[k].position;
}
- if (!deletion) {
- //mistmatch!!
- mis++;
- aln[len] = aln[len] * -1;
- len++;
+ } else if (event_aln[k].type < 0) {
+ insert += abs(event_aln[k].type);
+ if (insert_max < abs(event_aln[k].type)) {
+ insert_max = abs(event_aln[k].type);
+ insert_max_pos = event_aln[k].position;
}
- match = false;
- } else if (!match) {
- len += atoi(&md[i]);
- match = true;
- deletion = false;
}
}
+ str_event tmp;
+ tmp.pos = event_aln[start].position;
- /* if (flag) {
- for (size_t i = 0; i < aln.size(); i++) {
- std::cout << aln[i] << " ";
- }
- std::cout << endl;
+ tmp.length = event_aln[stop].position;
+ if (event_aln[stop].type > 1) { //because of the way we summarize mutations to one location
+ tmp.length += event_aln[stop].type;
}
-*/
- int runlength = 100;
- str_event ev;
- ev.pos = -1;
- ev.length = -1;
- ev.read_pos = 0;
- int start = 0;
- int last = 0;
- for (size_t i = 0; i < aln.size(); i += 50) { //+=runlength/2 ??
- //std::cout<<aln[i]<<";";
- int mis = 0;
- int first = 0;
-
- for (size_t j = 0; (j + i) < aln.size() && j < runlength; j++) {
- if (aln[(i + j)] < 0) {
- if (first == 0) {
- first = abs(aln[(i + j)]);
- }
- last = abs(aln[(i + j)]);
- mis++;
- }
+ tmp.length = (tmp.length - event_aln[start].position);
+
+ tmp.type = 0;
+ //TODO constance used!
+ if (insert_max > Parameter::Instance()->min_length && insert > (del + del)) { //we have an insertion! //todo check || vs. &&
+ if (flag) {
+ cout << "store INS" << endl;
}
- if (mis > min_mis) { //TOOD ratio?
- if (ev.pos == -1) {
- start = i;
- ev.pos = first;
- ev.read_pos = ev.pos - getPosition();
- }
- } else {
- if ((start > 20 && abs((int) (i + runlength) - (int) aln.size()) > 20) && ev.pos != -1) {
- if (flag) {
- std::cout << i << " " << (i + runlength) << " " << aln.size() << std::endl;
- std::cout << ev.pos << " " << last << " " << std::endl;
- }
- includes_SV = true;
- ev.length = last - ev.pos;
- if (flag) {
- std::cout << ev.pos << " " << ev.length << std::endl;
- }
- if (ev.length > runlength) {
- events.push_back(ev);
- }
- last = 0;
- ev.pos = -1;
- } else {
- ev.pos = -1;
- }
+ tmp.length = insert_max; //TODO not sure!
+ tmp.pos = insert_max_pos;
+ tmp.type |= INS;
+ } else if (del_max > Parameter::Instance()->min_length && (mismatch < 2 && (insert + insert) < del)) { //deletion
+ if (flag) {
+ cout << "store DEL" << endl;
+ }
+ tmp.type |= DEL;
+ } else { //something:
+ if (flag) {
+ cout << "store Noise" << endl;
}
+ tmp.type |= DEL;
+ tmp.type |= INV;
+ }
+
+ if (flag) {
+ cout << "Read: " << (double) diffs << " " << (double) this->getRefLength() << " events: " << event_aln.size() << " " << this->al->Name << std::endl;
+ cout << "INS max " << insert_max << " del_max " << del_max << std::endl;
+ cout << "INS:" << insert << " DEL: " << del << " MIS: " << mismatch << endl;
+ cout << event_aln[start].position << " " << event_aln[stop].position << endl;
+ cout << tot_len << " " << this->getRefLength() << endl;
+ cout << "store: " << tmp.pos << " " << tmp.pos + abs(tmp.length) << " " << tmp.length << endl;
+ cout << endl;
}
- }
+ tot_len += tmp.length;
+
+ if (tot_len > this->getRefLength() * 0.77) { // if the read is just noisy as hell:
+ events.clear();
+ return events;
+ } else {
+ if (flag) {
+ cout << "STORE" << endl;
+ }
+ events.push_back(tmp);
+ }
+ }
+ }
+// Parameter::Instance()->meassure_time(comp_aln, "\tcompPosition: ");
+ if (events.size() > 4) { //TODO very arbitrary! test?
+ events.clear();
}
return events;
}
+
diff --git a/src/Alignment.d b/src/Alignment.d
new file mode 100644
index 0000000..05f069c
--- /dev/null
+++ b/src/Alignment.d
@@ -0,0 +1,54 @@
+src/Alignment.d: ../src/Alignment.cpp ../src/Alignment.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h \
+ ../src/Paramer.h
+
+../src/Alignment.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h:
+
+../src/Paramer.h:
diff --git a/src/Alignment.h b/src/Alignment.h
index 1894519..085202e 100644
--- a/src/Alignment.h
+++ b/src/Alignment.h
@@ -7,7 +7,7 @@
#ifndef ALIGNMENTS_H_
#define ALIGNMENTS_H_
-
+#include <ctime>
#include <string.h>
#include <vector>
#include <math.h>
@@ -15,25 +15,40 @@
#include "api/BamMultiReader.h"
#include "api/BamWriter.h"
#include "Paramer.h"
+#include "plane-sweep/PlaneSweep_slim.h"
+
+const unsigned char DEL = 0x01; // hex for 0000 0001
+const unsigned char DUP = 0x02; // hex for 0000 0010
+const unsigned char INS = 0x04; // hex for 0000 0100
+const unsigned char INV = 0x08; // hex for 0000 1000
+const unsigned char TRA = 0x10; // hex for 0001 0000
+
+
+
using namespace BamTools;
using namespace std;
-
typedef unsigned short ushort;
typedef unsigned int uint;
+struct differences_str{
+ int position;
+ short type;
+};
+
struct str_event{
short length;
int pos;
int read_pos;
+ char type;
};
struct aln_str{
int RefID;
- int pos;
+ long pos;
bool strand;
std::vector<CigarOp> cigar;
ushort mq;
ushort nm;
- ushort length;
+ long length;
int read_pos_start;
int read_pos_stop;
};
@@ -41,6 +56,7 @@ struct aln_str{
class Alignment {
private:
+ int ref_len;
BamAlignment * al;
bool includes_SV;
pair<string,string> alignment;
@@ -49,10 +65,13 @@ private:
int stop;
std::vector<CigarOp> translate_cigar(std::string cigar);
size_t get_length(std::vector<CigarOp> CigarData);
-
int get_id(RefVector ref, std::string chr);
+ vector<differences_str> summarizeAlignment();
+ void sort_insert(aln_str tmp, vector<aln_str> &entries);
+ void check_entries(vector<aln_str> &entries);
public:
Alignment(){
+ ref_len=0;
stop=0;
orig_length=0;
al=NULL;
@@ -79,7 +98,7 @@ public:
size_t getRefLength();
size_t getOrigLen();
BamAlignment * getAlignment();
- float getIdentity();
+ //float getIdentity();
void initAlignment();
int getAlignmentFlag();
string getQueryBases();
@@ -87,6 +106,7 @@ public:
string getTagData();
vector<aln_str> getSA(RefVector ref);
void initSequence();
+ vector<str_event> get_events_Aln();
int get_stop(){
return stop;
}
@@ -104,6 +124,8 @@ public:
double get_scrore_ratio();
std::string get_md();
double get_avg_indel_length_Cigar();
+ vector<int> get_avg_diff(double & avg_dist);
+
};
diff --git a/src/Alignment.o b/src/Alignment.o
new file mode 100644
index 0000000..c796031
Binary files /dev/null and b/src/Alignment.o differ
diff --git a/src/BamParser.cpp b/src/BamParser.cpp
index 83fb9b3..8ed431d 100644
--- a/src/BamParser.cpp
+++ b/src/BamParser.cpp
@@ -37,9 +37,10 @@ void BamParser::parseReadFast(uint16_t mappingQv,Alignment*& align){
// Alignment *align = new Alignment();
BamAlignment* al = align->getAlignment();
// getSequence().first
- align->initSequence();
-
+// align->initSequence();
+ align->getQueryBases().clear();
while(reader.GetNextAlignmentCore(al[0])){
+
if( al->IsMapped() && al->MapQuality > mappingQv){
al->BuildCharData();
align->setAlignment(al);
diff --git a/src/BamParser.d b/src/BamParser.d
new file mode 100644
index 0000000..f6c63d6
--- /dev/null
+++ b/src/BamParser.d
@@ -0,0 +1,58 @@
+src/BamParser.d: ../src/BamParser.cpp ../src/BamParser.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h \
+ ../src/Alignment.h ../src/Paramer.h ../src/Parser.h
+
+../src/BamParser.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h:
+
+../src/Alignment.h:
+
+../src/Paramer.h:
+
+../src/Parser.h:
diff --git a/src/BamParser.o b/src/BamParser.o
new file mode 100644
index 0000000..0721929
Binary files /dev/null and b/src/BamParser.o differ
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4a524c2..3f470b3 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -7,9 +7,10 @@ include_directories(../lib/tclap-1.2.1/include)
configure_file( Version.h.in ${CMAKE_SOURCE_DIR}/src/Version.h )
add_executable(sniffles
+ tree/Breakpoint_Tree.cpp
+ Genotyper/Genotyper.cpp
Alignment.cpp
BamParser.cpp
- Parser.cpp
Sniffles.cpp
Ignore_Regions.cpp
tree/Intervall_bed.cpp
@@ -19,12 +20,12 @@ add_executable(sniffles
realign/SWCPU.cpp
realign/Realign.cpp
print/VCFPrinter.cpp
- print/BedePrinter.cpp
- print/MariaPrinter.cpp
+ print/BedpePrinter.cpp
print/IPrinter.cpp
- phasing/PhaserSV.cpp
tree/BinTree.cpp
print/NGMPrinter.cpp
+ plane-sweep/PlaneSweep_slim.cpp
+ cluster/Cluster_SVs.cpp
)
#target_link_libraries(ngm-core pthread)
@@ -32,9 +33,10 @@ TARGET_LINK_LIBRARIES(sniffles BamTools-static)
TARGET_LINK_LIBRARIES(sniffles zlibstatic)
add_executable(sniffles-debug
+ tree/Breakpoint_Tree.cpp
+ Genotyper/Genotyper.cpp
Alignment.cpp
BamParser.cpp
- Parser.cpp
Sniffles.cpp
Ignore_Regions.cpp
tree/Intervall_bed.cpp
@@ -44,12 +46,12 @@ add_executable(sniffles-debug
realign/SWCPU.cpp
realign/Realign.cpp
print/VCFPrinter.cpp
- print/BedePrinter.cpp
- print/MariaPrinter.cpp
+ print/BedpePrinter.cpp
print/IPrinter.cpp
- phasing/PhaserSV.cpp
tree/BinTree.cpp
print/NGMPrinter.cpp
+ plane-sweep/PlaneSweep_slim.cpp
+ cluster/Cluster_SVs.cpp
)
SET_TARGET_PROPERTIES(sniffles-debug PROPERTIES COMPILE_FLAGS "-g3 -O0")
diff --git a/src/Genotyper/Genotyper.cpp b/src/Genotyper/Genotyper.cpp
new file mode 100644
index 0000000..90f7ba3
--- /dev/null
+++ b/src/Genotyper/Genotyper.cpp
@@ -0,0 +1,246 @@
+/*
+ * Genotyper.cpp
+ *
+ * Created on: Mar 28, 2016
+ * Author: fsedlaze
+ */
+
+#include "Genotyper.h"
+
+std::string Genotyper::mod_breakpoint_vcf(char *buffer, int ref) {
+ //find last of\t
+ //parse #reads supporting
+ //print #ref
+ string entry;
+ string tmp = string(buffer);
+ int pos = 0;
+ pos = tmp.find_last_of('\t');
+
+ entry = tmp.substr(0, pos);
+ entry += '\t';
+
+ tmp = tmp.substr(pos + 1);
+ pos = tmp.find_last_of(':');
+ int support = atoi(tmp.substr(pos + 1).c_str());
+ double allele = (double) support / (double) (support + ref);
+ if (allele > 0.8) {
+ entry += "1/1:";
+ } else if (allele > 0.3) {
+ entry += "0/1:";
+ } else {
+ entry += "0/0:";
+ }
+ std::stringstream ss;
+ ss << ref;
+ ss << ":";
+ ss << support;
+
+ entry += ss.str();
+ return entry;
+
+}
+
+std::string Genotyper::mod_breakpoint_bedpe(char *buffer, int ref) {
+
+ std::string tmp = string(buffer);
+ std::string entry = tmp;
+ entry += '\t';
+ //int ref = max(tree.get_ref(node,var.chr,var.pos),tree.get_ref(node,var.chr2,var.pos2));
+
+ int pos = tmp.find_last_of('\t'); //TODO!!
+ int support = atoi(tmp.substr(pos + 1).c_str());
+ std::stringstream ss;
+ ss << ref;
+ ss << "\t";
+ ss << support;
+ entry += ss.str();
+ return entry;
+}
+
+variant_str Genotyper::get_breakpoint_vcf(char *buffer) {
+ size_t i = 0;
+ int count = 0;
+
+ variant_str tmp;
+
+ while (buffer[i] != '\0' && buffer[i] != '\n') {
+ if (count == 0 && buffer[i] != '\t') {
+ tmp.chr += buffer[i];
+ }
+ if (count == 1 && buffer[i - 1] == '\t') {
+ tmp.pos = atoi(&buffer[i]);
+ }
+
+ if (count > 6 && strncmp(";CHR2=", &buffer[i], 6) == 0) {
+ i += 6;
+ while (buffer[i] != ';') {
+ tmp.chr2 += buffer[i];
+ i++;
+ }
+ }
+ if (count > 6 && strncmp(";END=", &buffer[i], 5) == 0) {
+ i += 5;
+ tmp.pos2 = atoi(&buffer[i]); //stores right most breakpoint
+ break;
+ }
+
+ if (buffer[i] == '\t') {
+ count++;
+ }
+ i++;
+ }
+ return tmp;
+}
+variant_str Genotyper::get_breakpoint_bedpe(char *buffer) {
+ size_t i = 0;
+ int count = 0;
+ std::string chr;
+ variant_str tmp;
+
+ while (buffer[i] != '\0' && buffer[i] != '\n') {
+ if (count == 12 && buffer[i] != '\t') {
+ tmp.chr += buffer[i];
+ }
+
+ if (count == 13 && buffer[i - 1] == '\t') {
+ tmp.pos = atoi(&buffer[i]);
+ }
+ if (count == 14 && buffer[i] != '\t') {
+ tmp.chr2 += buffer[i];
+ }
+ if (count == 15 && buffer[i - 1] == '\t') {
+ tmp.pos2 = atoi(&buffer[i]);
+ break;
+ }
+ if (buffer[i] == '\t') {
+ count++;
+ }
+ i++;
+ }
+ return tmp;
+}
+
+void Genotyper::update_file(Breakpoint_Tree & tree, breakpoint_node *& node) {
+ std::ifstream myfile;
+ bool is_vcf = !Parameter::Instance()->output_vcf.empty();
+
+ string file_name;
+ if (!Parameter::Instance()->output_vcf.empty()) {
+ file_name = Parameter::Instance()->output_vcf;
+ myfile.open(Parameter::Instance()->output_vcf.c_str(), std::ifstream::in);
+ } else if (!Parameter::Instance()->output_bedpe.empty()) {
+ file_name = Parameter::Instance()->output_bedpe;
+ myfile.open(Parameter::Instance()->output_bedpe.c_str(), std::ifstream::in);
+ }
+
+ FILE*file = fopen(Parameter::Instance()->tmp_file.c_str(), "w");
+
+ if (!myfile.good()) {
+ std::cout << "SVParse: could not open file: " << std::endl;
+ exit(0);
+ }
+ size_t buffer_size = 25000;
+ char* buffer = new char[buffer_size];
+ myfile.getline(buffer, buffer_size);
+ //parse SVs breakpoints in file
+ while (!myfile.eof()) {
+ if (buffer[0] != '#') {
+ std::string to_print;
+ // create binary tree to hold breakpoints!
+ variant_str tmp;
+ if (is_vcf) {
+ tmp = get_breakpoint_vcf(buffer);
+ } else {
+ tmp = get_breakpoint_bedpe(buffer);
+ }
+ int ref = max(tree.get_ref(node, tmp.chr, tmp.pos), tree.get_ref(node, tmp.chr2, tmp.pos2));
+ if (is_vcf) {
+ to_print = mod_breakpoint_vcf(buffer, ref);
+ } else {
+ to_print = mod_breakpoint_bedpe(buffer, ref);
+ }
+ fprintf(file, "%s", to_print.c_str());
+ } else {
+ fprintf(file, "%s", buffer);
+ }
+ fprintf(file, "%c", '\n');
+ myfile.getline(buffer, buffer_size);
+ }
+ myfile.close();
+ fclose(file);
+
+ string move = "mv ";
+ move += Parameter::Instance()->tmp_file;
+ move += " ";
+ move += file_name;
+ system(move.c_str());
+}
+
+void Genotyper::read_SVs(Breakpoint_Tree & tree, breakpoint_node *& node) {
+
+ std::ifstream myfile;
+ bool is_vcf = !Parameter::Instance()->output_vcf.empty();
+
+ if (!Parameter::Instance()->output_vcf.empty()) {
+ myfile.open(Parameter::Instance()->output_vcf.c_str(), std::ifstream::in);
+ } else if (!Parameter::Instance()->output_bedpe.empty()) {
+ myfile.open(Parameter::Instance()->output_bedpe.c_str(), std::ifstream::in);
+ }
+
+ if (!myfile.good()) {
+ std::cout << "SVParse: could not open file: " << std::endl;
+ exit(0);
+ }
+ size_t buffer_size = 25000;
+ char* buffer = new char[buffer_size];
+ myfile.getline(buffer, buffer_size);
+ //parse SVs breakpoints in file
+
+ while (!myfile.eof()) {
+ //cout << buffer << endl;
+ if (buffer[0] != '#') {
+ // create binary tree to hold breakpoints!
+
+ variant_str tmp;
+ if (is_vcf) {
+ tmp = get_breakpoint_vcf(buffer);
+ } else {
+ tmp = get_breakpoint_bedpe(buffer);
+ }
+ tree.insert(node, tmp.chr, tmp.pos);
+ tree.insert(node, tmp.chr2, tmp.pos2);
+ }
+ myfile.getline(buffer, buffer_size);
+ }
+ myfile.close();
+ //tree.inorder(node);
+}
+void Genotyper::compute_cov(Breakpoint_Tree & tree, breakpoint_node *& node) {
+ std::string name = Parameter::Instance()->tmp_file.c_str();
+ name += "ref_allele";
+ FILE * ref_allel_reads = fopen(name.c_str(), "r");
+ if (ref_allel_reads == NULL) {
+ std::cerr << "CovParse: could not open file: " << Parameter::Instance()->tmp_file.c_str() << std::endl;
+ }
+
+ //check if we want to compute the full coverage!
+ str_read tmp;
+ size_t nbytes = fread(&tmp, sizeof(struct str_read), 1, ref_allel_reads);
+ while (nbytes != 0) {
+ if (!tmp.SV_support) {
+ //if reads should be included-> Planesweep for +- breakpoint (Maybe hit -> extra function for that region around the breakpoint!
+ tree.overalps(tmp.start, tmp.start + tmp.length, tmp.chr, node, tmp.SV_support);
+ }
+ nbytes = fread(&tmp, sizeof(struct str_read), 1, ref_allel_reads);
+ }
+ fclose(ref_allel_reads);
+// tree.inorder(node);
+}
+
+void Genotyper::update_SVs() {
+ //parse SVs not just breakpoints and update with the coverage info
+ read_SVs(this->tree, this->node);
+ compute_cov(this->tree, this->node);
+ update_file(this->tree, this->node);
+}
+
diff --git a/src/Genotyper/Genotyper.h b/src/Genotyper/Genotyper.h
new file mode 100644
index 0000000..2aca323
--- /dev/null
+++ b/src/Genotyper/Genotyper.h
@@ -0,0 +1,40 @@
+/*
+ * Genotyper.h
+ *
+ * Created on: Mar 28, 2016
+ * Author: fsedlaze
+ */
+
+#ifndef GENOTYPER_H_
+#define GENOTYPER_H_
+#include "../Paramer.h"
+#include "../print/IPrinter.h"
+#include "../tree/Breakpoint_Tree.h"
+struct variant_str{
+ std::string chr;
+ std::string chr2;
+ int pos;
+ int pos2;
+};
+class Genotyper{
+private:
+ Breakpoint_Tree tree;
+ breakpoint_node * node;
+ void read_SVs(Breakpoint_Tree & tree,breakpoint_node *& node );
+ void compute_cov(Breakpoint_Tree & tree,breakpoint_node *& node);
+ void update_file(Breakpoint_Tree & tree,breakpoint_node *& node);
+ variant_str get_breakpoint_vcf(char *buffer);
+ variant_str get_breakpoint_bedpe(char *buffer);
+ std::string mod_breakpoint_vcf(char *buffer, int ref);
+ std::string mod_breakpoint_bedpe(char *buffer, int ref);
+
+public:
+ Genotyper(){
+ node=NULL;
+ }
+ ~Genotyper(){
+
+ }
+ void update_SVs();
+};
+#endif /* GENOTYPER_H_ */
diff --git a/src/Paramer.h b/src/Paramer.h
index 6c4e4f4..6607bbd 100644
--- a/src/Paramer.h
+++ b/src/Paramer.h
@@ -13,6 +13,7 @@
#include <vector>
#include <stdlib.h>
#include <iostream>
+#include <ctime>
struct region_str {
std::string chr;
int start;
@@ -22,8 +23,7 @@ struct region_str {
class Parameter {
private:
Parameter() {
- score_treshold = 0;
- min_num_mismatches = 0.3;
+ window_thresh=10;//TODO check!
}
~Parameter() {
@@ -32,11 +32,11 @@ private:
std::vector<region_str> regions;
public:
std::string output_vcf;
- std::string output_bede;
+ std::string output_bedpe;
std::string ref_seq;
std::string read_name;
- std::string output_maria;
std::string ignore_regions_bed;
+ std::string tmp_file;
std::vector<std::string> bam_files;
short min_mq;
@@ -46,17 +46,26 @@ public:
double error_rate;
double score_treshold;
- double min_num_mismatches;
+ double avg_distance;
+ //double min_num_mismatches;
+ int window_thresh;
int min_support;
int max_splits;
int max_dist;
int min_length;
int min_reads_phase;
int num_threads;
+ int max_readlength;
+ int min_grouping_support; //min num reads supporting the overlap of two SVs
+ int huge_ins;
bool realign;
+ bool splitthreader_output;
bool useMD_CIGAR;
+ bool debug;
+ bool genotype;
+ bool phase;
void set_regions(std::string reg) {
size_t i = 0;
@@ -88,6 +97,14 @@ public:
}
return m_pInstance;
}
+
+ double meassure_time(clock_t begin ,std::string msg){
+ clock_t end = clock();
+ double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
+ std::cout << msg<<" " << elapsed_secs<<std::endl;
+ return elapsed_secs;
+ }
+
};
#endif /* PARAMER_H_ */
diff --git a/src/Parser.cpp b/src/Parser.cpp
deleted file mode 100644
index 82c373b..0000000
--- a/src/Parser.cpp
+++ /dev/null
@@ -1,8 +0,0 @@
-/*
- * Parser.cpp
- *
- * Created on: May 29, 2012
- * Author: fritz
- */
-
-#include "Parser.h"
diff --git a/src/Parser.d b/src/Parser.d
new file mode 100644
index 0000000..cc1ff22
--- /dev/null
+++ b/src/Parser.d
@@ -0,0 +1,56 @@
+src/Parser.d: ../src/Parser.cpp ../src/Parser.h ../src/Alignment.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h \
+ ../src/Paramer.h
+
+../src/Parser.h:
+
+../src/Alignment.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h:
+
+../src/Paramer.h:
diff --git a/src/Parser.h b/src/Parser.h
index a15ff42..a07d55d 100644
--- a/src/Parser.h
+++ b/src/Parser.h
@@ -10,8 +10,14 @@
#include "Alignment.h"
-class Parser {
+struct str_read{
+ string chr;
+ uint start;
+ ushort length;
+ bool SV_support;
+};
+class Parser {
public:
virtual ~Parser(){};
virtual Alignment * parseRead(uint16_t mappingQv) = 0;
diff --git a/src/Parser.o b/src/Parser.o
new file mode 100644
index 0000000..011d63e
Binary files /dev/null and b/src/Parser.o differ
diff --git a/src/Sniffles.cpp b/src/Sniffles.cpp
index e14f25d..b8f80fb 100644
--- a/src/Sniffles.cpp
+++ b/src/Sniffles.cpp
@@ -11,65 +11,62 @@
#include "Paramer.h"
#include <tclap/CmdLine.h>
#include <omp.h>
+#include "Genotyper/Genotyper.h"
#include "realign/Realign.h"
#include "sub/Detect_Breakpoints.h"
#include "print/IPrinter.h"
-#include "print/BedePrinter.h"
#include "print/VCFPrinter.h"
-#include "print/MariaPrinter.h"
-#include "phasing/PhaserSV.h"
+#include "print/BedpePrinter.h"
#include "print/NGMPrinter.h"
#include "Ignore_Regions.h"
+#include "plane-sweep/PlaneSweep_slim.h"
+#include "print/BedpePrinter.h"
Parameter* Parameter::m_pInstance = NULL;
//cmake -D CMAKE_C_COMPILER=/opt/local/bin/gcc-mp-4.7 -D CMAKE_CXX_COMPILER=/opt/local/bin/g++-mp-4.7 ..
-//TODO: Friday: redo the positioning. Min,Max as representation, but collect the position and give the most likely!
-
-// TODO: Finish virtual Region class, Inherit Breakpoint and strange regions; Implement the region into the intervall tree.
-
+//TODO: for read names stored for each event store the number of possible events they support.-> If number==1 do not print them in tmp file.
+//TODO: write comparison script taking bed or a vcf file!
+//TODO: make score threshold only on events on reads and not on split reads!
// Think of method to filter out strange SV.
-// flushing the interval tree after a certain amount of SV/ bp? Would speed up things.
-// Allow for Illumina split read data?
-// Think again of computing the coverage?
-// VCF and bede output
-// Think of multiple bam files -> setting genotypes
// Think about overlapping SV, maybe flag to report if they share the same read -> phasing info?
// Regular scan through the SV and move those where the end point lies far behind the current pos or reads. Eg. 1MB?
-//TODO: look into the gene fusions.
void read_parameters(int argc, char *argv[]) {
- TCLAP::CmdLine cmd("Sniffles version 0.0.1", ' ', "0.0.1");
-
+ TCLAP::CmdLine cmd("Sniffles version 1.0.0", ' ', "1.0.0");
TCLAP::ValueArg<std::string> arg_bamfile("m", "mapped_reads", "Bam File", true, "", "string");
TCLAP::ValueArg<std::string> arg_vcf("v", "vcf", "VCF output file name", false, "", "string");
//TCLAP::ValueArg<std::string> arg_bede("b", "bede", "Bede output file name", false, "", "string");
- TCLAP::ValueArg<std::string> arg_maria("b", "bede", "Simplified format of bede Format.", false, "", "string");
+ TCLAP::ValueArg<std::string> arg_bedpe("b", "bedpe", " bedpe output file name", false, "", "string");
//TCLAP::ValueArg<std::string> arg_noregions("", "bed", "Ignore SV overlapping with those regions.", false, "", "string");
// TCLAP::ValueArg<std::string> arg_ref("r", "reference", "Reference fasta sequence. Activates realignment step", false, "", "string");
//TCLAP::ValueArg<std::string> arg_region("", "regions", "List of regions CHR:start-stop; to check", false, "", "string");
TCLAP::ValueArg<int> arg_support("s", "min_support", "Minimum number of reads that support a SV. Default: 10", false, 10, "int");
- TCLAP::ValueArg<int> arg_splits("", "max_num_splits", "Maximum number of splits per read to be still taken into account. Default: 4", false, 4, "int");
- TCLAP::ValueArg<int> arg_dist("d", "max_distance", "Maximum distance to group SV together. Default: 1kb", false, 1000, "int");
+ TCLAP::ValueArg<int> arg_splits("", "max_num_splits", "Maximum number of splits per read to be still taken into account. Default: 7", false, 7, "int");
+ TCLAP::ValueArg<int> arg_dist("d", "max_distance", "Maximum distance to group SV together. Default: 1kb", false, 500, "int");
TCLAP::ValueArg<int> arg_threads("t", "threads", "Number of threads to use. Default: 3", false, 3, "int");
//TCLAP::ValueArg<int> arg_corridor("", "corridor", "Maximum size of corridor for realignment. Default: 2000", false, 2000, "int");
- TCLAP::ValueArg<int> arg_minlength("l", "min_length", "Minimum length of SV to be reported. Default:0", false, 0, "int");
+ TCLAP::ValueArg<int> arg_minlength("l", "min_length", "Minimum length of SV to be reported. Default: 20", false, 20, "int");
TCLAP::ValueArg<int> arg_mq("q", "minmapping_qual", "Minimum Mapping Quality. Default: 20", false, 20, "int");
TCLAP::ValueArg<int> arg_cigar("c", "min_cigar_event", "Minimum Cigar Event (e.g. Insertion, deletion) to take into account. Default:50 ", false, 50, "int");
TCLAP::ValueArg<int> arg_numreads("n", "num_reads_report", "Report up to N reads that support the SV. Default: 0", false, 0, "int");
// TCLAP::ValueArg<int> arg_phase_minreads("", "min_reads_phase", "Minimum reads overlapping two SV to phase them together. Default: 1", false, 1, "int");
-
+ TCLAP::ValueArg<std::string> arg_tmp_file("", "tmp_file", "patht to temporary file otherwise Sniffles will use the current directory.", false, "", "string");
// TCLAP::SwitchArg arg_realign("", "re-align", "Enables the realignment of reads at predicted SV sites. Leads to more accurate breakpoint predictions.", cmd, false);
- TCLAP::SwitchArg arg_MD_cigar("", "use_MD_Cigar", "Enables Sniffles to use the alignment information to screen for suspicious regions.", cmd, false);
+// TCLAP::SwitchArg arg_MD_cigar("", "use_MD_Cigar", "Enables Sniffles to use the alignment information to screen for suspicious regions.", cmd, false);
+ //TCLAP::SwitchArg arg_Splitthreader("", "Splitthreader_output", "Enables Sniffles to compute also the full coverage required by Splitthreader.", cmd, false);
+ TCLAP::SwitchArg arg_genotype("", "genotype", "Enables Sniffles to compute the genotypes. (alpha version)", cmd, false);
+ TCLAP::SwitchArg arg_cluster("", "cluster", "Enables Sniffles to phase SVs that occure on the same reads (alpha version)", cmd, false);
cmd.add(arg_numreads);
+ cmd.add(arg_tmp_file);
cmd.add(arg_dist);
//cmd.add(arg_bede);
// cmd.add(arg_noregions);
cmd.add(arg_threads);
cmd.add(arg_cigar);
- cmd.add(arg_maria);
+ cmd.add(arg_bedpe);
cmd.add(arg_vcf);
// cmd.add(arg_ref);
//cmd.add(arg_corridor);
@@ -83,7 +80,9 @@ void read_parameters(int argc, char *argv[]) {
//parse cmd:
cmd.parse(argc, argv);
- Parameter::Instance()->read_name = " "; //m150118_093731_00118_c100767352550000001823169407221590_s1_p0/27613/0_15682" ;//TODO: just for debuging reasons!
+ Parameter::Instance()->debug = true;
+ Parameter::Instance()->score_treshold = 10;
+ Parameter::Instance()->read_name = " ";//00632dff-dc1a-4225-b50e-eaab5fd89cd0_Basecall_Alignment_template"; ;//just for debuging reasons!
Parameter::Instance()->bam_files.push_back(arg_bamfile.getValue());
Parameter::Instance()->min_mq = arg_mq.getValue();
Parameter::Instance()->output_vcf = arg_vcf.getValue();
@@ -93,15 +92,53 @@ void read_parameters(int argc, char *argv[]) {
Parameter::Instance()->max_splits = arg_splits.getValue();
Parameter::Instance()->max_dist = arg_dist.getValue();
//Parameter::Instance()->ref_seq = arg_ref.getValue();
+ Parameter::Instance()->splitthreader_output = true; //arg_Splitthreader.getValue();
Parameter::Instance()->realign = false; //arg_realign.getValue();
//Parameter::Instance()->corridor = arg_corridor.getValue();
// Parameter::Instance()->output_bede = arg_bede.getValue();
Parameter::Instance()->min_length = arg_minlength.getValue();
//Parameter::Instance()->min_reads_phase = arg_phase_minreads.getValue();
- Parameter::Instance()->useMD_CIGAR = arg_MD_cigar.getValue();
+// Parameter::Instance()->useMD_CIGAR = arg_MD_cigar.getValue();
+ Parameter::Instance()->genotype = arg_genotype.getValue();
+ Parameter::Instance()->phase = arg_cluster.getValue();
Parameter::Instance()->num_threads = arg_threads.getValue();
- Parameter::Instance()->output_maria = arg_maria.getValue();
+ Parameter::Instance()->output_bedpe = arg_bedpe.getValue();
//Parameter::Instance()->ignore_regions_bed = arg_noregions.getValue();
+ Parameter::Instance()->tmp_file = "test.tmp"; //arg_tmp_file.getValue();
+ Parameter::Instance()->min_grouping_support = 1;
+ Parameter::Instance()->huge_ins = 4000;//TODO check??
+
+ if (Parameter::Instance()->tmp_file.empty()) {
+ std::stringstream ss;
+ //ss<<"."; //TODO: User does not need to see this!
+ ss << rand();
+ ss << "_tmp";
+ Parameter::Instance()->tmp_file = ss.str();
+ }
+}
+
+void parse_binary() {
+ std::string tmp_name_file = Parameter::Instance()->tmp_file; // this file is created in IPrinter and stores the names and ID of SVS.
+ tmp_name_file += "Names";
+
+ FILE * alt_allel_reads = fopen(tmp_name_file.c_str(), "r");
+ if (alt_allel_reads == NULL) {
+ std::cerr << "ClusterParse: could not open tmp file: " << tmp_name_file.c_str() << std::endl;
+ }
+ std::cout << "start" << std::endl;
+ name_str tmp;
+ size_t nbytes = fread(&tmp, sizeof(struct name_str), 1, alt_allel_reads);
+ std::cout << tmp.read_name << std::endl;
+ while (nbytes != 0) {
+ int max_ID = std::max(max_ID, tmp.svs_id);
+
+ if (tmp.svs_id == 34 || tmp.svs_id == 35) {
+ std::cout << "Cluster: " << tmp.svs_id << " " << tmp.read_name << std::endl;
+ }
+ // std::cout << tmp.read_name << std::endl;
+ nbytes = fread(&tmp, sizeof(struct name_str), 1, alt_allel_reads);
+ }
+ fclose(alt_allel_reads);
}
int main(int argc, char *argv[]) {
@@ -120,11 +157,8 @@ int main(int argc, char *argv[]) {
printer = new NGMPrinter();
} else if (!Parameter::Instance()->output_vcf.empty()) {
printer = new VCFPrinter();
- std::cout << "VCF parser" << std::endl;
- } else if (!Parameter::Instance()->output_bede.empty()) {
- printer = new BedePrinter();
- } else if (!Parameter::Instance()->output_maria.empty()) {
- printer = new MariaPrinter();
+ } else if (!Parameter::Instance()->output_bedpe.empty()) {
+ printer = new BedpePrinter();
} else {
std::cerr << "Please specify an output file using -v or -b" << std::endl;
return -1;
@@ -132,26 +166,36 @@ int main(int argc, char *argv[]) {
printer->init();
- //TODO add support of multiple files!
- for (size_t i = 0; i < Parameter::Instance()->bam_files.size(); i++) {
- detect_breakpoints(Parameter::Instance()->bam_files[i], printer);
+ detect_breakpoints(Parameter::Instance()->bam_files[0], printer); //we could write out all read names for each sVs
+ printer->close_file();
+
+ //cluster the SVs together:
+ if (Parameter::Instance()->phase) {
+ std::cout << "Start phasing: " << std::endl;
+ Cluster_SVS *cluster = new Cluster_SVS();
+ cluster->update_SVs();
+ }
+
+ //determine genotypes:
+ if (Parameter::Instance()->genotype) {
+ std::cout << "Start genotype calling" << std::endl;
+ Genotyper * go = new Genotyper();
+ go->update_SVs();
}
//realignment: Using NGM???
if (!Parameter::Instance()->ref_seq.empty()) {
- std::cout<<"Realignment step activated:"<<std::endl;
+ std::cout << "Realignment step activated:" << std::endl;
//1. parse in output file(s)
//2. extract reads from bam files (read groups?) //shellscript: picard/samtools??
//3. realign
//4. call Sniffles
}
- //grouping SV together:
- //PhaserSV * phaser= new PhaserSV();
- //phaser->phase(final_SV);
- //delete phaser;
-
- //printing results:
+ cout << "Cleaning tmp files" << endl;
+ string del = "rm ";
+ del += Parameter::Instance()->tmp_file;
+ //system(del.c_str());
} catch (TCLAP::ArgException &e) // catch any exceptions
{
@@ -167,7 +211,6 @@ int main(int argc, char *argv[]) {
*
*
*/
-
/*
* 1. Detect strange regions
* Using MD, Cigar, Split reads
diff --git a/src/Sniffles.d b/src/Sniffles.d
new file mode 100644
index 0000000..c2d7b20
--- /dev/null
+++ b/src/Sniffles.d
@@ -0,0 +1,148 @@
+src/Sniffles.d: ../src/Sniffles.cpp ../src/Paramer.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/CmdLine.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/SwitchArg.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/Arg.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/ArgException.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/Visitor.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/CmdLineInterface.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/ArgTraits.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/StandardTraits.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/MultiSwitchArg.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/UnlabeledValueArg.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/ValueArg.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/Constraint.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/OptionalUnlabeledTracker.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/UnlabeledMultiArg.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/MultiArg.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/XorHandler.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/HelpVisitor.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/CmdLineOutput.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/VersionVisitor.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/IgnoreRestVisitor.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/StdOutput.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/ValuesConstraint.h \
+ ../src/sub/Detect_Breakpoints.h ../src/sub/../BamParser.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h \
+ ../src/Alignment.h ../src/Parser.h \
+ ../src/sub/../plane-sweep/Plane-sweep.h \
+ ../src/sub/../plane-sweep/IContainer.h \
+ ../src/sub/../plane-sweep/Node.h ../src/sub/../plane-sweep/MyList.h \
+ ../src/sub/../plane-sweep/MyHeap.h ../src/sub/../tree/IntervallTree.h \
+ ../src/sub/../tree/TNode.h ../src/sub/../tree/../sub/Breakpoint.h
+
+../src/Paramer.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/CmdLine.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/SwitchArg.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/Arg.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/ArgException.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/Visitor.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/CmdLineInterface.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/ArgTraits.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/StandardTraits.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/MultiSwitchArg.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/UnlabeledValueArg.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/ValueArg.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/Constraint.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/OptionalUnlabeledTracker.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/UnlabeledMultiArg.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/MultiArg.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/XorHandler.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/HelpVisitor.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/CmdLineOutput.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/VersionVisitor.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/IgnoreRestVisitor.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/StdOutput.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include/tclap/ValuesConstraint.h:
+
+../src/sub/Detect_Breakpoints.h:
+
+../src/sub/../BamParser.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h:
+
+../src/Alignment.h:
+
+../src/Parser.h:
+
+../src/sub/../plane-sweep/Plane-sweep.h:
+
+../src/sub/../plane-sweep/IContainer.h:
+
+../src/sub/../plane-sweep/Node.h:
+
+../src/sub/../plane-sweep/MyList.h:
+
+../src/sub/../plane-sweep/MyHeap.h:
+
+../src/sub/../tree/IntervallTree.h:
+
+../src/sub/../tree/TNode.h:
+
+../src/sub/../tree/../sub/Breakpoint.h:
diff --git a/src/Sniffles.o b/src/Sniffles.o
new file mode 100644
index 0000000..44b4603
Binary files /dev/null and b/src/Sniffles.o differ
diff --git a/src/cluster/Cluster_SVs.cpp b/src/cluster/Cluster_SVs.cpp
new file mode 100644
index 0000000..45700f9
--- /dev/null
+++ b/src/cluster/Cluster_SVs.cpp
@@ -0,0 +1,149 @@
+/*
+ * Cluster_SVs.cpp
+ *
+ * Created on: Apr 28, 2016
+ * Author: fsedlaze
+ */
+
+#include "Cluster_SVs.h"
+
+std::map<long, std::vector<int> > Cluster_SVS::parse_names_ids(int & max_ID) {
+ std::string tmp_name_file = Parameter::Instance()->tmp_file; // this file is created in IPrinter and stores the names and ID of SVS.
+ tmp_name_file += "Names";
+
+ FILE * alt_allel_reads = fopen(tmp_name_file.c_str(), "r");
+ if (alt_allel_reads == NULL) {
+ std::cerr << "ClusterParse: could not open tmp file: " << tmp_name_file.c_str() << std::endl;
+ }
+
+ std::map<long, std::vector<int> > names;
+ name_str tmp;
+ size_t nbytes = fread(&tmp, sizeof(struct name_str), 1, alt_allel_reads);
+ while (nbytes != 0) {
+ max_ID = std::max(max_ID, tmp.svs_id);
+ //if(strcmp("22_19990256",tmp.read_name.c_str())==0){
+ // std::cout<<"Cluster: "<<tmp.svs_id<<std::endl;
+ //}
+ /*if (tmp.svs_id == 34 || tmp.svs_id == 35) {
+ std::cout << "Cluster: " << tmp.svs_id << " " << tmp.read_name << std::endl;
+ }*/
+
+ names[tmp.read_name].push_back(tmp.svs_id);
+ nbytes = fread(&tmp, sizeof(struct name_str), 1, alt_allel_reads);
+ }
+ fclose(alt_allel_reads);
+ return names;
+}
+
+void Cluster_SVS::update_SVs(std::vector<combine_str> & ids) {
+ std::ifstream myfile;
+ bool is_vcf = !Parameter::Instance()->output_vcf.empty();
+ std::string filename;
+ int col;
+ if (is_vcf) {
+ col = 2;
+ filename = Parameter::Instance()->output_vcf;
+ } else {
+ col = 6;
+ filename = Parameter::Instance()->output_bedpe;
+ }
+ myfile.open(filename.c_str(), std::ifstream::in);
+ if (!myfile.good()) {
+ std::cout << "Cluster Parse: could not open file: " << std::endl;
+ exit(0);
+ }
+
+ std::string tmp_name_file = filename;
+ tmp_name_file += ".tmp";
+ FILE*file = fopen(tmp_name_file.c_str(), "w");
+
+ size_t buffer_size = 250000;
+ char* buffer = new char[buffer_size];
+ myfile.getline(buffer, buffer_size);
+ //parse SVs breakpoints in file
+ while (!myfile.eof()) {
+ if (buffer[0] != '#') {
+ int count = 0;
+ for (size_t i = 0; i < buffer_size && (buffer[i] != '\0' && buffer[i] != '\n'); i++) {
+ if (count == col) { //if colum of id:
+ if (buffer[i - 1] == '\t') {
+ int id = atoi(&buffer[i]);
+ fprintf(file, "%i", find_id(id, ids));
+ fprintf(file, "%c", '\t');
+ }
+ } else {
+ fprintf(file, "%c", buffer[i]);
+ }
+ if (buffer[i] == '\t') {
+ count++;
+ }
+ }
+ } else {
+ fprintf(file, "%s", buffer);
+ }
+ fprintf(file, "%c", '\n');
+ myfile.getline(buffer, buffer_size);
+ }
+ myfile.close();
+ fclose(file);
+
+ std::string move = "mv ";
+ move += tmp_name_file;
+ move += " ";
+ move += filename;
+ system(move.c_str());
+}
+void Cluster_SVS::add_id(int curr_id, int new_id, std::vector<combine_str> & ids) {
+ for (size_t i = 0; i < ids.size(); i++) {
+ if (ids[i].curr_id == curr_id && ids[i].alt_id == new_id) {
+ ids[i].support++;
+ return;
+ }
+ }
+ combine_str tmp;
+ tmp.curr_id = curr_id;
+ tmp.alt_id = new_id;
+ tmp.support = 1;
+ ids.push_back(tmp);
+}
+int Cluster_SVS::find_id(int curr_id, std::vector<combine_str> & ids) {
+ for (size_t i = 0; i < ids.size(); i++) {
+ if (ids[i].support > Parameter::Instance()->min_grouping_support) {
+ if (ids[i].curr_id == curr_id) {
+ return ids[i].alt_id;
+ }
+ }
+ }
+ return curr_id;
+}
+void Cluster_SVS::update_SVs() {
+//1: read in names + IDs -> store in map!
+ int max_ID = 0;
+
+ //TODO: restructure!
+ //id=svs_id;
+
+ std::map<long, std::vector<int> > names = parse_names_ids(max_ID);
+
+//2: make array with ID as entry and value is the smalles ID in the colum of all storred readnames.
+ std::vector<combine_str> ids;
+ for (std::map<long, std::vector<int> >::iterator i = names.begin(); i != names.end(); i++) {
+ if ((*i).second.size() > 1) {
+ int min_id = max_ID + 1;
+ for (size_t j = 0; j < (*i).second.size(); j++) {
+ min_id = std::min(min_id, (*i).second[j]);
+ }
+ for (size_t j = 0; j < (*i).second.size(); j++) {
+ if ((*i).second[j] != min_id) {
+ add_id((*i).second[j], min_id, ids);
+ }
+ }
+ }
+ }
+ names.clear();
+/* for (size_t i = 0; i < ids.size(); i++) {
+ std::cout << ids[i].curr_id << " " << ids[i].alt_id << " " << ids[i].support << std::endl;
+ }*/
+//3: Update the IDS in the VCF/Bedpe files.
+ update_SVs(ids);
+}
diff --git a/src/cluster/Cluster_SVs.h b/src/cluster/Cluster_SVs.h
new file mode 100644
index 0000000..8c0fc58
--- /dev/null
+++ b/src/cluster/Cluster_SVs.h
@@ -0,0 +1,41 @@
+/*
+ * Cluster_SVs.h
+ *
+ * Created on: Apr 28, 2016
+ * Author: fsedlaze
+ */
+
+#ifndef CLUSTER_CLUSTER_SVS_H_
+#define CLUSTER_CLUSTER_SVS_H_
+#include <iostream>
+#include <fstream>
+#include <string.h>
+#include <vector>
+#include <map>
+#include "../Paramer.h"
+struct __attribute__((packed)) name_str{
+ long read_name;
+ int svs_id;
+};
+struct combine_str{
+ int curr_id;
+ int alt_id;
+ int support;
+};
+
+class Cluster_SVS{
+private:
+ std::map<long, std::vector<int> > parse_names_ids(int & max_ID) ;
+ void update_SVs( std::vector<combine_str> & ids); //just because the pass is more efficient
+ void add_id(int curr_id,int new_id, std::vector<combine_str> & ids);
+ int find_id(int curr_id, std::vector<combine_str> & ids);
+public:
+ Cluster_SVS(){
+ }
+ ~Cluster_SVS(){
+ }
+ void update_SVs();
+};
+
+
+#endif /* CLUSTER_CLUSTER_SVS_H_ */
diff --git a/src/phasing/PhaserSV.cpp b/src/phasing/PhaserSV.cpp
deleted file mode 100644
index 0e61541..0000000
--- a/src/phasing/PhaserSV.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * PhaserSV.cpp
- *
- * Created on: Sep 2, 2015
- * Author: fsedlaze
- */
-
-#include "PhaserSV.h"
-
-void PhaserSV::phase(std::vector<Breakpoint *> &svs) {
- //run all against all to check read names.. Is there an easier method?
-
-}
diff --git a/src/phasing/PhaserSV.h b/src/phasing/PhaserSV.h
deleted file mode 100644
index 8acafcc..0000000
--- a/src/phasing/PhaserSV.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * PhaserSV.h
- *
- * Created on: Sep 2, 2015
- * Author: fsedlaze
- */
-
-#include "api/BamReader.h"
-#include "../sub/Breakpoint.h"
-#include "../tree/BinTree.h"
-
-class PhaserSV{
-private:
-
-public:
- PhaserSV(){
-
- }
- ~PhaserSV(){
-
- }
- void phase(std::vector<Breakpoint *> &svs);
-
-};
diff --git a/src/plane-sweep/Main.d b/src/plane-sweep/Main.d
new file mode 100644
index 0000000..ab8853b
--- /dev/null
+++ b/src/plane-sweep/Main.d
@@ -0,0 +1 @@
+src/plane-sweep/Main.d: ../src/plane-sweep/Main.cpp
diff --git a/src/plane-sweep/Main.o b/src/plane-sweep/Main.o
new file mode 100644
index 0000000..65ded99
Binary files /dev/null and b/src/plane-sweep/Main.o differ
diff --git a/src/plane-sweep/MyHeap.d b/src/plane-sweep/MyHeap.d
new file mode 100644
index 0000000..5cbba9f
--- /dev/null
+++ b/src/plane-sweep/MyHeap.d
@@ -0,0 +1,62 @@
+src/plane-sweep/MyHeap.d: ../src/plane-sweep/MyHeap.cpp \
+ ../src/plane-sweep/MyHeap.h ../src/plane-sweep/IContainer.h \
+ ../src/plane-sweep/Node.h ../src/plane-sweep/../Alignment.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h \
+ ../src/plane-sweep/../Paramer.h
+
+../src/plane-sweep/MyHeap.h:
+
+../src/plane-sweep/IContainer.h:
+
+../src/plane-sweep/Node.h:
+
+../src/plane-sweep/../Alignment.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h:
+
+../src/plane-sweep/../Paramer.h:
diff --git a/src/plane-sweep/MyHeap.o b/src/plane-sweep/MyHeap.o
new file mode 100644
index 0000000..01bff2a
Binary files /dev/null and b/src/plane-sweep/MyHeap.o differ
diff --git a/src/plane-sweep/MyList.d b/src/plane-sweep/MyList.d
new file mode 100644
index 0000000..e4df628
--- /dev/null
+++ b/src/plane-sweep/MyList.d
@@ -0,0 +1,62 @@
+src/plane-sweep/MyList.d: ../src/plane-sweep/MyList.cpp \
+ ../src/plane-sweep/MyList.h ../src/plane-sweep/Node.h \
+ ../src/plane-sweep/../Alignment.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h \
+ ../src/plane-sweep/../Paramer.h ../src/plane-sweep/IContainer.h
+
+../src/plane-sweep/MyList.h:
+
+../src/plane-sweep/Node.h:
+
+../src/plane-sweep/../Alignment.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h:
+
+../src/plane-sweep/../Paramer.h:
+
+../src/plane-sweep/IContainer.h:
diff --git a/src/plane-sweep/MyList.o b/src/plane-sweep/MyList.o
new file mode 100644
index 0000000..250cda9
Binary files /dev/null and b/src/plane-sweep/MyList.o differ
diff --git a/src/plane-sweep/PlaneSweep.d b/src/plane-sweep/PlaneSweep.d
new file mode 100644
index 0000000..4dc4f32
--- /dev/null
+++ b/src/plane-sweep/PlaneSweep.d
@@ -0,0 +1,67 @@
+src/plane-sweep/PlaneSweep.d: ../src/plane-sweep/PlaneSweep.cpp \
+ ../src/plane-sweep/Plane-sweep.h ../src/plane-sweep/IContainer.h \
+ ../src/plane-sweep/Node.h ../src/plane-sweep/../Alignment.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h \
+ ../src/plane-sweep/../Paramer.h ../src/plane-sweep/MyList.h \
+ ../src/plane-sweep/MyHeap.h
+
+../src/plane-sweep/Plane-sweep.h:
+
+../src/plane-sweep/IContainer.h:
+
+../src/plane-sweep/Node.h:
+
+../src/plane-sweep/../Alignment.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h:
+
+../src/plane-sweep/../Paramer.h:
+
+../src/plane-sweep/MyList.h:
+
+../src/plane-sweep/MyHeap.h:
diff --git a/src/plane-sweep/PlaneSweep.o b/src/plane-sweep/PlaneSweep.o
new file mode 100644
index 0000000..3214bfb
Binary files /dev/null and b/src/plane-sweep/PlaneSweep.o differ
diff --git a/src/plane-sweep/PlaneSweep_slim.cpp b/src/plane-sweep/PlaneSweep_slim.cpp
new file mode 100644
index 0000000..317b7c1
--- /dev/null
+++ b/src/plane-sweep/PlaneSweep_slim.cpp
@@ -0,0 +1,43 @@
+/*
+ * PlaneSweep_slim.cpp
+ *
+ * Created on: Mar 9, 2016
+ * Author: fsedlaze
+ */
+
+#include "PlaneSweep_slim.h"
+
+pair_str PlaneSweep_slim::add_mut(int pos,int new_cov, int min_cov) {
+ //check if we need to release reads:
+ std::vector<pair_str>::iterator j = entries.begin();
+ for (size_t i = 0; i < this->entries.size() && pos > entries[i].position; i++) {
+ //no need to record ending events. We just search for starting events!
+ this->cov-=entries[i].coverage;
+ j++;
+ }
+
+ //erase old events:
+ entries.erase(entries.begin(), j);
+
+
+ //add current mut:
+ //insert the stop coordinate!
+ pair_str tmp;
+ tmp.position=pos+boundary;
+ tmp.coverage=new_cov;
+ this->entries.push_back(tmp);
+ this->cov+=new_cov;
+
+ //record if we met the threshold:
+
+ tmp.position=-1; //flag for not meeting the threshold
+ if(this->cov>min_cov){
+ tmp.coverage = this->cov;
+ tmp.position = pos;
+ }
+ return tmp;
+}
+void PlaneSweep_slim::finalyze() {
+ entries.clear();
+ this->cov=0;
+}
diff --git a/src/plane-sweep/PlaneSweep_slim.h b/src/plane-sweep/PlaneSweep_slim.h
new file mode 100644
index 0000000..d26bd90
--- /dev/null
+++ b/src/plane-sweep/PlaneSweep_slim.h
@@ -0,0 +1,40 @@
+/*
+ * PlaneSweep_slim.h
+ *
+ * Created on: Mar 9, 2016
+ * Author: fsedlaze
+ */
+
+#ifndef PLANE_SWEEP_PLANESWEEP_SLIM_H_
+#define PLANE_SWEEP_PLANESWEEP_SLIM_H_
+#include <vector>
+#include <iostream>
+#include <string.h>
+#include <list>
+#include "../Paramer.h" //for testing/debug
+struct pair_str{
+ int position;
+ int coverage;
+};
+
+
+class PlaneSweep_slim {
+private:
+ int boundary;
+ int cov;
+ std::vector<pair_str> entries;
+public:
+ PlaneSweep_slim() {
+ boundary=100;
+ cov=0;
+ }
+ ~PlaneSweep_slim(){
+ entries.clear();
+ }
+ void release_pos(int new_start);
+ pair_str add_mut(int pos,int cov, int min_cov);
+ void finalyze();
+
+};
+
+#endif /* PLANE_SWEEP_PLANESWEEP_SLIM_H_ */
diff --git a/src/plane-sweep/subdir.mk b/src/plane-sweep/subdir.mk
new file mode 100644
index 0000000..09def0d
--- /dev/null
+++ b/src/plane-sweep/subdir.mk
@@ -0,0 +1,33 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+# Add inputs and outputs from these tool invocations to the build variables
+CPP_SRCS += \
+../src/plane-sweep/Main.cpp \
+../src/plane-sweep/MyHeap.cpp \
+../src/plane-sweep/MyList.cpp \
+../src/plane-sweep/PlaneSweep.cpp
+
+OBJS += \
+./src/plane-sweep/Main.o \
+./src/plane-sweep/MyHeap.o \
+./src/plane-sweep/MyList.o \
+./src/plane-sweep/PlaneSweep.o
+
+CPP_DEPS += \
+./src/plane-sweep/Main.d \
+./src/plane-sweep/MyHeap.d \
+./src/plane-sweep/MyList.d \
+./src/plane-sweep/PlaneSweep.d
+
+
+# Each subdirectory must supply rules for building sources it contributes
+src/plane-sweep/%.o: ../src/plane-sweep/%.cpp
+ @echo 'Building file: $<'
+ @echo 'Invoking: Cross G++ Compiler'
+ g++ -I/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/ -I/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include -O3 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<"
+ @echo 'Finished building: $<'
+ @echo ' '
+
+
diff --git a/src/print/BedePrinter.cpp b/src/print/BedePrinter.cpp
deleted file mode 100644
index 04c6e67..0000000
--- a/src/print/BedePrinter.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * BedePrinter.cpp
- *
- * Created on: Aug 24, 2015
- * Author: fsedlaze
- */
-
-#include "BedePrinter.h"
-
-void BedePrinter::print_header() {
-//nothing to be done!
-}
-void BedePrinter::print_body(Breakpoint * &SV, RefVector ref) {
- //chr1 934247 934273 chr1 934690 934692 1 1.36749e-107 + - TYPE:DELETION IDS:2,36 STRANDS:+-,36 MAX:chr1:934248;chr1:934692 95:chr1:934248-934252;chr1:934692-934692
-
- if (!this->bed_tree.is_in(SV->get_coordinates().start.most_support, this->root) && !this->bed_tree.is_in(SV->get_coordinates().stop.most_support, this->root)) {
- std::string chr;
- int pos = IPrinter::calc_pos(SV->get_coordinates().start.max_pos, ref, chr);
- fprintf(file, "%s", chr.c_str());
- fprintf(file, "%c", '\t');
- fprintf(file, "%i", pos);
- fprintf(file, "%c", '\t');
- fprintf(file, "%i", pos + 1);
- fprintf(file, "%c", '\t');
- pos = IPrinter::calc_pos(SV->get_coordinates().stop.max_pos, ref, chr);
- fprintf(file, "%s", chr.c_str());
- fprintf(file, "%c", '\t');
- fprintf(file, "%i", pos);
- fprintf(file, "%c", '\t');
- fprintf(file, "%i", pos + 1);
- fprintf(file, "%c", '\t');
- fprintf(file, "%i", id);
- id++;
- fprintf(file, "%c", '\t');
- fprintf(file, "%c", '0'); //TODO: think about eval!
- fprintf(file, "%c", '\t');
- fprintf(file, "%s", SV->get_strand(1).c_str());
- fprintf(file, "%c", '\t');
- fprintf(file, "%s", IPrinter::get_type(SV->get_SVtype()).c_str()); //TODO
- fprintf(file, "%c", '\t');
- fprintf(file, "%s", "IDS:??");
- fprintf(file, "%c", '\t');
- fprintf(file, "%s", "STRANDS:??,666");
- fprintf(file, "%c", '\t');
- fprintf(file, "%s", "MAX:");
- pos = IPrinter::calc_pos(SV->get_coordinates().start.most_support, ref, chr);
- fprintf(file, "%s", chr.c_str());
- fprintf(file, "%c", ':');
- fprintf(file, "%i", pos);
- fprintf(file, "%c", ',');
- pos = IPrinter::calc_pos(SV->get_coordinates().stop.most_support, ref, chr);
- fprintf(file, "%s", chr.c_str());
- fprintf(file, "%c", ':');
- fprintf(file, "%i", pos);
- fprintf(file, "%c", '\t');
- fprintf(file, "%i",SV->get_length());
- fprintf(file, "%c", '\n');
- }
-}
diff --git a/src/print/MariaPrinter.cpp b/src/print/BedpePrinter.cpp
similarity index 73%
rename from src/print/MariaPrinter.cpp
rename to src/print/BedpePrinter.cpp
index b5cc161..746a928 100644
--- a/src/print/MariaPrinter.cpp
+++ b/src/print/BedpePrinter.cpp
@@ -1,18 +1,22 @@
/*
- * MariaPrinter.cpp
+ * BedePrinter.cpp
*
- * Created on: Sep 4, 2015
+ * Created on: Aug 24, 2015
* Author: fsedlaze
*/
-#include "MariaPrinter.h"
+#include "BedpePrinter.h"
-void MariaPrinter::print_header() {
- fprintf(file, "%s", "Chrom\tstart\tstop\tchrom2\tstart2\tstop2\tvariant_name/ID\tscore (smaller is better)\tstrand1\tstrand2\ttype\tnumber_of_split_reads\tbest_chrom\tbest_pos\tbest_chrom2\tbest_pos2\tpredicted_length\n");
+void BedpePrinter::print_header() {
+ fprintf(file, "%s", "#Chrom\tstart\tstop\tchrom2\tstart2\tstop2\tvariant_name/ID\tscore (smaller is better)\tstrand1\tstrand2\ttype\tnumber_of_split_reads\tbest_chrom\tbest_pos\tbest_chrom2\tbest_pos2\tpredicted_length\n");
}
-void MariaPrinter::print_body(Breakpoint *& SV, RefVector ref) {
- //"Chrom\tstart\tstop\tchrom2\tstart2\tstop2\tvariant_name/ID\tscore (smaller is better)\tstrand1\tstrand2\ttype\tnumber_of_split_reads\n"
+void BedpePrinter::print_body(Breakpoint * &SV, RefVector ref) {
if (!this->bed_tree.is_in(SV->get_coordinates().start.most_support, this->root) && !this->bed_tree.is_in(SV->get_coordinates().stop.most_support, this->root)) {
+ //temp. store read names supporting this SVs to later group the SVs together.
+ if (Parameter::Instance()->phase) {
+ store_readnames(SV->get_read_ids(), id);
+ }
+
std::string chr;
std::string strands = SV->get_strand(2);
int pos = IPrinter::calc_pos(SV->get_coordinates().start.min_pos, ref, chr);
@@ -53,6 +57,8 @@ void MariaPrinter::print_body(Breakpoint *& SV, RefVector ref) {
fprintf(file, "%i", pos);
fprintf(file, "%c", '\t');
fprintf(file, "%i", SV->get_length());
+ fprintf(file, "%c", '\t');
+ fprintf(file, "%i", SV->get_support());
fprintf(file, "%c", '\n');
}
}
diff --git a/src/print/BedePrinter.h b/src/print/BedpePrinter.h
similarity index 55%
rename from src/print/BedePrinter.h
rename to src/print/BedpePrinter.h
index 5b5b47a..a91a6c7 100644
--- a/src/print/BedePrinter.h
+++ b/src/print/BedpePrinter.h
@@ -5,22 +5,22 @@
* Author: fsedlaze
*/
-#ifndef PRINT_BEDEPRINTER_H_
-#define PRINT_BEDEPRINTER_H_
+#ifndef PRINT_BEDPEPRINTER_H_
+#define PRINT_BEDPEPRINTER_H_
#include "IPrinter.h"
-class BedePrinter:public IPrinter{
+class BedpePrinter:public IPrinter{
private:
void print_header();
void print_body(Breakpoint *& SV, RefVector ref);
public:
- BedePrinter(){
+ BedpePrinter(){
}
- ~BedePrinter(){
+ ~BedpePrinter(){
}
};
-#endif /* PRINT_BEDEPRINTER_H_ */
+#endif /* PRINT_BEDPEPRINTER_H_ */
diff --git a/src/print/IPrinter.cpp b/src/print/IPrinter.cpp
index 1bb9f4a..1f1a9a5 100644
--- a/src/print/IPrinter.cpp
+++ b/src/print/IPrinter.cpp
@@ -19,6 +19,15 @@ std::string IPrinter::get_chr(long pos, RefVector ref) {
return ref[id - 1].RefName;
}
+void IPrinter::store_readnames(std::vector<int> names, int id) {
+ name_str tmp;
+ tmp.svs_id = id; //stays the same
+ for (size_t i = 0; i < names.size(); i++) {
+ tmp.read_name = names[i];
+ fwrite(&tmp, sizeof(struct name_str), 1, this->tmp_file);
+ }
+}
+
long IPrinter::calc_pos(long pos, RefVector ref, std::string &chr) {
size_t i = 0;
pos -= (ref[i].RefLength + Parameter::Instance()->max_dist);
@@ -32,20 +41,45 @@ long IPrinter::calc_pos(long pos, RefVector ref, std::string &chr) {
}
std::string IPrinter::get_type(char type) {
+ string tmp;
if (type & DEL) {
- return "DEL";
+ tmp += "DEL";
}
if (type & INV) {
- return "INV";
+ if (!tmp.empty()) {
+ tmp += '/';
+ }
+ tmp += "INV";
}
if (type & DUP) {
- return "DUP";
+ if (!tmp.empty()) {
+ tmp += '/';
+ }
+ tmp += "DUP";
}
if (type & INS) {
- return "INS";
+ if (!tmp.empty()) {
+ tmp += '/';
+ }
+ tmp += "INS";
}
if (type & TRA) {
- return "TRA";
+ if (!tmp.empty()) {
+ tmp += '/';
+ }
+ tmp += "TRA";
}
- return "WTF"; // should not occur!
+
+ return tmp; // should not occur!
+}
+// Get current date/time, format is YYYY-MM-DD.HH:mm:ss
+const std::string IPrinter::currentDateTime() {
+ time_t now = time(0);
+ struct tm tstruct;
+ char buf[80];
+ tstruct = *localtime(&now);
+ // Visit http://en.cppreference.com/w/cpp/chrono/c/strftime
+ // for more information about date/time format
+ strftime(buf, sizeof(buf), "%Y%m%d", &tstruct);
+ return buf;
}
diff --git a/src/print/IPrinter.h b/src/print/IPrinter.h
index be10a18..351260e 100644
--- a/src/print/IPrinter.h
+++ b/src/print/IPrinter.h
@@ -9,15 +9,17 @@
#define PRINT_IPRINTER_H_
#include <vector>
#include <iostream>
-
+#include <time.h>
#include "../tree/Intervall_bed.h"
#include "api/BamReader.h"
#include "../Ignore_Regions.h"
#include "../sub/Breakpoint.h"
+#include "../cluster/Cluster_SVs.h"
class IPrinter {
protected:
FILE *file;
+ FILE *tmp_file;
uint id;
RefVector ref;
BamParser *mapped_file;
@@ -38,17 +40,21 @@ public:
}
virtual ~IPrinter() {
delete mapped_file;
- fclose(this->file);
+
}
- ;
+
void printSV(Breakpoint * SV) {
print_body(SV, ref);
}
void init() {
- if(!Parameter::Instance()->output_vcf.empty()){
- file = fopen(Parameter::Instance()->output_vcf.c_str(), "w");
- }else if(!Parameter::Instance()->output_maria.empty()){
- file = fopen(Parameter::Instance()->output_maria.c_str(), "w");
+ try {
+ if(!Parameter::Instance()->output_vcf.empty()) {
+ file = fopen(Parameter::Instance()->output_vcf.c_str(), "w");
+ } else if(!Parameter::Instance()->output_bedpe.empty()) {
+ file = fopen(Parameter::Instance()->output_bedpe.c_str(), "w");
+ }
+ } catch(int e) {
+ std::cout << "Unable to open file connection. Please check the path and the permissions! Error: " << e <<std::endl;
}
print_header();
BamParser *mapped_file = new BamParser(Parameter::Instance()->bam_files[0]);
@@ -58,8 +64,15 @@ public:
std::cout << "Cross checking..." << std::endl;
initialize_bed(bed_tree, root, ref);
}
-
+ string tmp_name_file = Parameter::Instance()->tmp_file;
+ tmp_name_file += "Names";
+ tmp_file = fopen(tmp_name_file.c_str(), "wb");
+ }
+ void store_readnames(std::vector<int> names, int id);
+ void close_file() {
+ fclose(this->file);
}
+ const std::string currentDateTime();
};
#endif /* PRINT_IPRINTER_H_ */
diff --git a/src/print/MariaPrinter.h b/src/print/MariaPrinter.h
deleted file mode 100644
index e73c4ae..0000000
--- a/src/print/MariaPrinter.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * MariaPrinter.h
- *
- * Created on: Sep 4, 2015
- * Author: fsedlaze
- */
-
-#include "IPrinter.h"
-
-
-
-class MariaPrinter: public IPrinter{
-private:
- void print_header();
- void print_body(Breakpoint * &SV, RefVector ref);
-
-public:
- MariaPrinter(){
-
- }
- ~MariaPrinter(){
-
- }
-};
diff --git a/src/print/VCFPrinter.cpp b/src/print/VCFPrinter.cpp
index 6b2d98e..04bb45c 100644
--- a/src/print/VCFPrinter.cpp
+++ b/src/print/VCFPrinter.cpp
@@ -9,7 +9,10 @@
void VCFPrinter::print_header() {
fprintf(file, "%s", "##fileformat=VCFv4.1\n");
- fprintf(file, "%s", "##fileDate=20150221\n"); //TODO change!
+ //string time = currentDateTime();
+ fprintf(file, "%s", "##fileDate=");
+// fprintf(file, "%s", time.c_str());
+ fprintf(file, "%s", "\n"); //TODO change!
fprintf(file, "%s", "##ALT=<ID=DEL,Description=\"Deletion\">\n");
fprintf(file, "%s", "##ALT=<ID=DUP,Description=\"Duplication\">\n");
fprintf(file, "%s", "##ALT=<ID=INV,Description=\"Inversion\">\n");
@@ -29,10 +32,10 @@ void VCFPrinter::print_header() {
fprintf(file, "%s", "##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description=\"Imprecise structural variation\">\n");
fprintf(file, "%s", "##INFO=<ID=PRECISE,Number=0,Type=Flag,Description=\"Precise structural variation\">\n");
fprintf(file, "%s", "##INFO=<ID=SVLEN,Number=1,Type=Integer,Description=\"Length of the SV\">\n");
- fprintf(file, "%s", "##INFO=<ID=SUPTYPE,Number=1,Type=String,Description=\"What supports the SV.\">\n");
- fprintf(file, "%s", "##INFO=<SUPTYPE=SP,Description=\"SV supported by split reads\">\n");
- fprintf(file, "%s", "##INFO=<SUPTYPE=MD,Description=\"SV supported by MD string\">\n");
- fprintf(file, "%s", "##INFO=<SUPTYPE=CI,Description=\"SV supported by Cigar string\">\n");
+// fprintf(file, "%s", "##INFO=<ID=SUBTYPE,Number=1,Type=String,Description=\"What supports the SV.\">\n");
+// fprintf(file, "%s", "##INFO=<SUBTYPE=SP,Description=\"SV supported by split reads\">\n");
+// fprintf(file, "%s", "##INFO=<SUBTYPE=MD,Description=\"SV supported by MD string\">\n");
+// fprintf(file, "%s", "##INFO=<SUBTYPE=CI,Description=\"SV supported by Cigar string\">\n");
//fprintf(file, "%s", "##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">\n");
fprintf(file, "%s", "##INFO=<ID=SVMETHOD,Number=1,Type=String,Description=\"Type of approach used to detect SV\">\n");
fprintf(file, "%s", "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n");
@@ -41,7 +44,9 @@ void VCFPrinter::print_header() {
//fprintf(file, "%s", "##FORMAT=<ID=FT,Number=1,Type=String,Description=\"Per-sample genotype filter\">\n");
// fprintf(file, "%s", "##FORMAT=<ID=RC,Number=1,Type=Integer,Description=\"Normalized high-quality read count for the SV\">\n");
//fprintf(file, "%s", "##FORMAT=<ID=DR,Number=1,Type=Integer,Description=\"# high-quality reference pairs\">\n");
- fprintf(file, "%s", "##FORMAT=<ID=DV,Number=1,Type=Integer,Description=\"# high-quality variant pairs\">\n");
+ fprintf(file, "%s", "##FORMAT=<ID=DR,Number=1,Type=Integer,Description=\"# high-quality reference reads\">\n");
+ fprintf(file, "%s", "##FORMAT=<ID=DV,Number=1,Type=Integer,Description=\"# high-quality variant reads\">\n");
+
//fprintf(file, "%s", "##FORMAT=<ID=RR,Number=1,Type=Integer,Description=\"# high-quality reference junction reads\">\n");
//fprintf(file, "%s", "##FORMAT=<ID=RV,Number=1,Type=Integer,Description=\"# high-quality variant junction reads\">\n");
fprintf(file, "%s", "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT");
@@ -53,6 +58,11 @@ void VCFPrinter::print_header() {
}
void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
if (!this->bed_tree.is_in(SV->get_coordinates().start.most_support, this->root) && !this->bed_tree.is_in(SV->get_coordinates().stop.most_support, this->root)) {
+ //temp. store read names supporting this SVs to later group the SVs together.
+
+ if (Parameter::Instance()->phase) {
+ store_readnames(SV->get_read_ids(),id);
+ }
std::string chr;
int pos = IPrinter::calc_pos(SV->get_coordinates().start.most_support, ref, chr);
fprintf(file, "%s", chr.c_str());
@@ -67,8 +77,22 @@ void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
pos = IPrinter::calc_pos(SV->get_coordinates().stop.most_support, ref, chr);
fprintf(file, "%s", chr.c_str());
fprintf(file, "%s", ";END=");
- fprintf(file, "%i", pos);
- fprintf(file, "%s", ";SUPTYPE=");
+ if (SV->get_SVtype() & INS) {
+ fprintf(file, "%i", pos+SV->get_length());
+ }else{
+ fprintf(file, "%i", pos);
+ }
+ if (Parameter::Instance()->debug) {
+ std::vector< std::string> names=SV->get_read_names(Parameter::Instance()->report_n_reads);
+ fprintf(file, "%s", ";RNAMES=");
+ for(size_t i=0;i<names.size()&& i<Parameter::Instance()->report_n_reads;i++){
+ fprintf(file, "%s", names[i].c_str());
+ fprintf(file, "%s", ";");
+ }
+ }else{
+ fprintf(file, "%s", ";");
+ }
+ fprintf(file, "%s", "SUPTYPE=");
fprintf(file, "%s", SV->get_supporting_types().c_str());
fprintf(file, "%s", ";SVLEN=");
fprintf(file, "%i", SV->get_length());
@@ -76,7 +100,7 @@ void VCFPrinter::print_body(Breakpoint * &SV, RefVector ref) {
fprintf(file, "%s", SV->get_strand(1).c_str());
fprintf(file, "%s", ";RE=");
fprintf(file, "%i", SV->get_support());
- fprintf(file, "%s", "\tGT:DV\t./.:");
+ fprintf(file, "%s", "\tGT:DR:DV\t./.:");
fprintf(file, "%i", SV->get_support());
fprintf(file, "%c", '\n');
}
diff --git a/src/sub/Breakpoint.cpp b/src/sub/Breakpoint.cpp
index 3c232b1..ced2d66 100644
--- a/src/sub/Breakpoint.cpp
+++ b/src/sub/Breakpoint.cpp
@@ -11,8 +11,166 @@
* Created on: Jun 23, 2015
* Author: fsedlaze
*/
+#include "../print/IPrinter.h"
#include "Breakpoint.h"
+///////////////////////////////// MERGING////////////////////////////////////////////
+bool Breakpoint::check_SVtype(Breakpoint * break1, Breakpoint * break2) { //todo check that!
+ char SV1 = (*break1->get_coordinates().support.begin()).second.SV;
+ char SV2 = (*break2->get_coordinates().support.begin()).second.SV;
+ //we have to check it that way,because we can have multiple types!
+ if ((SV1 & INS) && (SV2 & INS)) {
+ return true;
+ } else if ((SV1 & INV) && (SV2 & INV)) {
+ return true;
+ } else if ((SV1 & TRA) && (SV2 & TRA)) {
+ return true;
+ } else if ((SV1 & DUP) && (SV2 & DUP)) {
+ return true;
+ } else if ((SV1 & DEL) && (SV2 & DEL)) {
+ return true;
+ } /*else if (((SV1 & DUP) && (SV2 & INS)) || ((SV1 & INS) && (SV2 & DUP))) {
+ return true;
+ }*/
+ return false;
+}
+bool Breakpoint::is_same_strand(Breakpoint * tmp) {
+ //todo check for same SVtype? except for INV+DEL
+ if (check_SVtype(tmp, this)) {
+ if ((*tmp->get_coordinates().support.begin()).second.SV & TRA) { //only for tra since we get otherwise a problem with the cigar events
+ return ((*tmp->get_coordinates().support.begin()).second.strand.first == (*this->get_coordinates().support.begin()).second.strand.first && (*tmp->get_coordinates().support.begin()).second.strand.second == (*this->get_coordinates().support.begin()).second.strand.second);
+ }
+ return true;
+ }
+ return false;
+}
+int get_dist(Breakpoint * tmp) {
+ position_str pos = tmp->get_coordinates();
+ //return Parameter::Instance()->max_dist;
+ if ((*tmp->get_coordinates().support.begin()).second.SV & TRA || (*tmp->get_coordinates().support.begin()).second.SV & INS) {
+ return Parameter::Instance()->max_dist; //TODO: change!
+ } else {
+ int max_val = Parameter::Instance()->max_dist;// * 0.5; //TOOD maybe 0.5?
+ return std::max(std::min((int) (pos.stop.max_pos - pos.start.min_pos) / 80, Parameter::Instance()->max_dist), max_val); //20% of the lenght of the SV
+ }
+}
+
+long Breakpoint::overlap(Breakpoint * tmp) {
+
+ int max_dist = get_dist(tmp);
+ //std::cout<<"\tOverlap: "<<max_dist<< " start: "<<abs(tmp->get_coordinates().start.min_pos - positions.stop.min_pos) << " stop :" <<abs(tmp->get_coordinates().stop.max_pos - positions.start.max_pos);
+
+ //check type. ALN could be part of the SPlit read event! Not merge two split reads!
+ if (is_same_strand(tmp) && (abs(tmp->get_coordinates().start.min_pos - positions.start.min_pos) < max_dist && abs(tmp->get_coordinates().stop.max_pos - positions.stop.max_pos) < max_dist)) {
+// std::cout << "\tfound hit!" << std::endl;
+ return 0;
+ }
+
+ //TODO:: if one of the two SVs is based on ALN it might be good to join them: to merge noisy flanking regions
+ if (((tmp->get_types().is_ALN || this->get_types().is_ALN) && !(tmp->get_types().is_ALN && this->get_types().is_ALN)) && (abs(tmp->get_coordinates().start.min_pos - positions.stop.min_pos) < max_dist/2 || abs(tmp->get_coordinates().stop.max_pos - positions.start.max_pos) < max_dist/2)) { //TODO maybe add SV type check!
+// std::cout << "\t hit!" << std::endl;
+ return 0;
+ }
+
+ //std::cout << "no hit? "<< std::endl;
+//as abstraction lets try the start+stop coordinate!
+ long diff=(tmp->get_coordinates().start.min_pos - positions.start.min_pos);
+ if(abs(diff)<max_dist){
+ return (tmp->get_coordinates().stop.max_pos - positions.stop.max_pos);
+ }
+ return diff;// + (tmp->get_coordinates().stop.max_pos - positions.stop.max_pos);
+}
+
+void Breakpoint::add_read(Breakpoint * point) {
+ if (point != NULL) {
+ if ((*point->get_coordinates().support.begin()).second.type == 0) {
+ this->type.is_ALN = true;
+ }
+ if ((*point->get_coordinates().support.begin()).second.type == 1) {
+ this->type.is_SR = true;
+ }
+
+ if ((point->get_types().is_ALN || this->get_types().is_ALN) && !(point->get_types().is_ALN && this->get_types().is_ALN)) { //
+ this->type.is_ALN = false; //TODO trick ;)
+ //THis is to merger noisy flanking regions:
+ if (abs(point->get_coordinates().start.min_pos - positions.stop.min_pos) < Parameter::Instance()->max_dist / 2) { //left of the current position
+ //start stays; stop changes
+ this->positions.stop.min_pos = positions.stop.min_pos;
+ this->positions.stop.max_pos = positions.stop.max_pos;
+ //Now we make the stop positions invalid:
+ std::map<std::string, read_str> support = this->positions.support;
+ for (std::map<std::string, read_str>::iterator i = support.begin(); i != support.end(); i++) {
+ (*i).second.coordinates.second = -1;
+ }
+ support = point->get_coordinates().support;
+ for (std::map<std::string, read_str>::iterator i = support.begin(); i != support.end(); i++) {
+ (*i).second.coordinates.second = -1;
+ }
+ } else if (abs(point->get_coordinates().stop.max_pos - positions.start.max_pos) < Parameter::Instance()->max_dist / 2) { //right of the current position
+ // cout << "right " << endl;
+ //stop stays; start changes:
+ this->positions.start.min_pos = positions.start.min_pos;
+ this->positions.start.max_pos = positions.start.max_pos;
+
+ //Now we make the start positions invalid:
+ std::map<std::string, read_str> support = this->positions.support;
+ for (std::map<std::string, read_str>::iterator i = support.begin(); i != support.end(); i++) {
+ (*i).second.coordinates.first = -1;
+ }
+ support = point->get_coordinates().support;
+ for (std::map<std::string, read_str>::iterator i = support.begin(); i != support.end(); i++) {
+ (*i).second.coordinates.first = -1;
+ }
+ }
+ } else {
+ this->positions.start.min_pos = min(this->positions.start.min_pos, point->get_coordinates().start.min_pos);
+ this->positions.start.max_pos = max(this->positions.start.max_pos, point->get_coordinates().start.max_pos);
+
+ if (point->get_coordinates().support.begin()->second.SV & INS) {
+ this->positions.stop.min_pos = this->positions.start.max_pos;
+ this->positions.stop.max_pos = this->positions.start.max_pos;
+ } else {
+ this->positions.stop.min_pos = min(this->positions.stop.min_pos, point->get_coordinates().stop.min_pos);
+ this->positions.stop.max_pos = max(this->positions.stop.max_pos, point->get_coordinates().stop.max_pos);
+ }
+ }
+ bool flag = false;
+ std::map<std::string, read_str> support = point->get_coordinates().support;
+ for (std::map<std::string, read_str>::iterator i = support.begin(); i != support.end(); i++) {
+ if (this->positions.support[(*i).first].SV & INS) {
+ flag = true;
+ }
+ this->positions.support[(*i).first] = (*i).second;
+ }
+ if (flag) {
+ this->length = (this->length + point->get_length()) / 2;
+ }
+ }
+}
+
+///////////////////////////////// MERGING////////////////////////////////////////////
+std::vector<std::string> Breakpoint::get_read_names(int maxim) {
+ std: vector<std::string> read_names;
+ std::map<std::string, read_str> support = this->positions.support;
+ int num = 0;
+ for (std::map<std::string, read_str>::iterator i = support.begin(); (num < maxim || maxim == -1) && i != support.end(); i++) {
+ read_names.push_back((*i).first);
+ num++;
+ }
+ return read_names;
+}
+
+std::vector<int> Breakpoint::get_read_ids() {
+ std: vector<int> read_names;
+ std::map<std::string, read_str> support = this->positions.support;
+ int num = 0;
+ for (std::map<std::string, read_str>::iterator i = support.begin(); i != support.end(); i++) {
+ read_names.push_back((*i).second.id);
+ num++;
+ }
+ return read_names;
+}
+
//TODO define region object and inherit from that. Plus define avoid region objects for mappability problems.
std::string Breakpoint::translate_strand(pair<bool, bool> strand_pair) {
@@ -58,8 +216,9 @@ void Breakpoint::summarize_type(char SV, std::vector<short>& array) {
}
char Breakpoint::get_SVtype() {
+
if (sv_type == ' ') {
- std::cout << "was not set" << std::endl;
+ std::cerr << "was not set" << std::endl;
calc_support();
predict_SV();
}
@@ -67,6 +226,7 @@ char Breakpoint::get_SVtype() {
}
void Breakpoint::calc_support() {
+ //if (positions.support.size() > Parameter::Instance()->min_support) {
std::vector<short> SV;
for (size_t i = 0; i < 5; i++) {
SV.push_back(0);
@@ -77,92 +237,91 @@ void Breakpoint::calc_support() {
}
//given the majority type get the stats:
this->sv_type = eval_type(SV);
+ //}
}
-char Breakpoint::eval_type(std::vector<short> SV) {
- std::stringstream ss;
-
- if (SV[0] != 0) {
- ss << " DEL(";
- ss << SV[0];
- ss << ")";
- }
- if (SV[1] != 0) {
- ss << " DUP(";
- ss << SV[1];
- ss << ")";
- }
- if (SV[2] != 0) {
- ss << " INS(";
- ss << SV[2];
- ss << ")";
- }
- if (SV[3] != 0) {
- ss << " INV(";
- ss << SV[3];
- ss << ")";
- }
- if (SV[4] != 0) {
- ss << " TRA(";
- ss << SV[4];
- ss << ")";
- }
- this->sv_debug = ss.str(); //only for debug!
- //std::cout << sv_debug << std::endl;
+char Breakpoint::eval_type(std::vector<short> SV) {
+ /* std::stringstream ss;
+ if (SV[0] != 0) {
+ ss << " DEL(";
+ ss << SV[0];
+ ss << ")";
+ }
+ if (SV[1] != 0) {
+ ss << " DUP(";
+ ss << SV[1];
+ ss << ")";
+ }
+ if (SV[2] != 0) {
+ ss << " INS(";
+ ss << SV[2];
+ ss << ")";
+ }
+ if (SV[3] != 0) {
+ ss << " INV(";
+ ss << SV[3];
+ ss << ")";
+ }
+ if (SV[4] != 0) {
+ ss << " TRA(";
+ ss << SV[4];
+ ss << ")";
+ }
+ this->sv_debug = ss.str(); //only for debug!
+ std::cout << sv_debug << std::endl;
+ */
int maxim = 0;
int id = 0;
for (size_t i = 0; i < SV.size(); i++) {
if (maxim < SV[i]) {
maxim = SV[i];
- id = i;
- } else if (maxim == SV[i]) {
- id = -1;
}
}
this->type_support = maxim;
- switch (id) {
- case 0:
- return DEL;
- break;
- case 1:
- return DUP;
- break;
- case 2:
- return INS;
- break;
- case 3:
- return INV;
- break;
- case 4:
- return TRA;
- break;
+ char max_SV = 0;
+ if (maxim == SV[0]) {
+ max_SV |= DEL;
}
- return 'n';
-}
-
-long Breakpoint::overlap(Breakpoint * tmp) {
- /*if ((*this->positions.support.begin()).second.SV & INS) {
- std::cout << "\tCurr: " << this->positions.start.min_pos << " " << this->positions.stop.max_pos << endl;
- std::cout << "\tNEW: " << tmp->get_coordinates().start.min_pos << " " << tmp->get_coordinates().stop.max_pos << endl;
- }*/
- if (abs(tmp->get_coordinates().start.min_pos - positions.start.min_pos) < Parameter::Instance()->max_dist && abs(tmp->get_coordinates().stop.max_pos - positions.stop.max_pos) < Parameter::Instance()->max_dist) {
- /*if ((*this->positions.support.begin()).second.SV & INS) {
- std::cout << "\tSAME" << std::endl;
- }*/
- return 0;
+ if (maxim == SV[1]) {
+ max_SV |= DUP;
}
- /*else {
- if ((*this->positions.support.begin()).second.SV & INS) {
- std::cout << "\tDiff" << std::endl;
- }
- }*/
-//as abstraction lets try the start+stop coordinate!
- return (tmp->get_coordinates().start.min_pos - positions.start.min_pos) + (tmp->get_coordinates().stop.max_pos - positions.stop.max_pos);
+ if (maxim == SV[2]) {
+ max_SV |= INS;
+ }
+ if (maxim == SV[3]) {
+ max_SV |= INV;
+ }
+ if (maxim == SV[4]) {
+ max_SV |= TRA;
+ }
+
+ return max_SV;
}
+/*void trans_strand(pair<bool, bool> tmp) {
+ if (tmp.first) {
+ std::cout << "+";
+ } else {
+ std::cout << "-";
+ }
+
+ if (tmp.second) {
+ std::cout << "+";
+ } else {
+ std::cout << "-";
+ }
+ std::cout << std::endl;
+ }*/
+
+/*
+ char direction(bool dir) {
+ if (dir) {
+ return '+';
+ }
+ return '-';
+ }*/
void Breakpoint::predict_SV() {
- bool md = false;
- bool cigar = false;
+ bool aln = false;
bool split = false;
int num = 0;
std::map<long, int> starts;
@@ -171,31 +330,35 @@ void Breakpoint::predict_SV() {
for (std::map<std::string, read_str>::iterator i = positions.support.begin(); i != positions.support.end(); i++) {
if ((*i).second.SV & this->sv_type) { ///check type
- if (starts.find((*i).second.coordinates.first) == starts.end()) {
- starts[(*i).second.coordinates.first] = 1;
- } else {
- starts[(*i).second.coordinates.first]++;
+ //cout << "Hit" << endl;
+ if ((*i).second.coordinates.first != -1) {
+ if (starts.find((*i).second.coordinates.first) == starts.end()) {
+ starts[(*i).second.coordinates.first] = 1;
+ } else {
+ starts[(*i).second.coordinates.first]++;
+ }
}
-
- if (stops.find((*i).second.coordinates.second) == stops.end()) {
- stops[(*i).second.coordinates.second] = 1;
- } else {
- stops[(*i).second.coordinates.second]++;
+ if ((*i).second.coordinates.second != -1) { //TODO test
+ if (stops.find((*i).second.coordinates.second) == stops.end()) {
+ stops[(*i).second.coordinates.second] = 1;
+ } else {
+ stops[(*i).second.coordinates.second]++;
+ }
}
+ if (!((*i).second.type == 0 && ((*i).second.SV & INV))) {
- std::string tmp = translate_strand((*i).second.strand); //std::string tmp=
- //std::cout << tmp << std::endl;
- if (strands.find(tmp) == strands.end()) {
- strands[tmp] = 1;
- } else {
- strands[tmp]++;
- }
+ std::string tmp = translate_strand((*i).second.strand);
+ //std::cout << tmp << std::endl;
+ if (strands.find(tmp) == strands.end()) {
+ strands[tmp] = 1;
+ } else {
+ strands[tmp]++;
+ }
+ }
if ((*i).second.type == 0) {
- cigar = true;
+ aln = true;
} else if ((*i).second.type == 1) {
- md = true;
- } else if ((*i).second.type == 2) {
split = true;
} else {
std::cerr << "Type " << (*i).second.type << std::endl;
@@ -204,6 +367,8 @@ void Breakpoint::predict_SV() {
}
}
+ long mean = 0;
+ long counts = 0;
int maxim = 0;
long coord = 0;
for (map<long, int>::iterator i = starts.begin(); i != starts.end(); i++) {
@@ -212,27 +377,33 @@ void Breakpoint::predict_SV() {
coord = (*i).first;
maxim = (*i).second;
}
+ mean += ((*i).first * (*i).second);
+ counts += (*i).second;
+ }
+ if (maxim < 5) {
+ this->positions.start.most_support = mean / counts;
+ } else {
+ this->positions.start.most_support = coord;
}
- //std::cout<<coord<<"\t"<<maxim<<endl;
- //if(this->sv_type & INV){
- // std::cout<<"Break: INV: "<<coord;
- //}
- this->positions.start.most_support = coord;
maxim = 0;
coord = 0;
+ mean = 0;
+ counts = 0;
for (map<long, int>::iterator i = stops.begin(); i != stops.end(); i++) {
if ((*i).second > maxim) {
coord = (*i).first;
maxim = (*i).second;
}
+ mean += ((*i).first * (*i).second);
+ counts += (*i).second;
+ }
+ if (maxim < 5) {
+ this->positions.stop.most_support = mean / counts;
+ } else {
+ this->positions.stop.most_support = coord;
}
- this->positions.stop.most_support = coord;
-
if (!(this->get_SVtype() & INS)) {
- if (this->get_SVtype() & INS) {
- std::cout << " ERROR " << std::endl;
- }
this->length = this->positions.stop.most_support - this->positions.start.most_support;
}
starts.clear();
@@ -256,14 +427,8 @@ void Breakpoint::predict_SV() {
strands.clear();
this->supporting_types = "";
- if (md) {
- this->supporting_types += "MD";
- }
- if (cigar) {
- if (!supporting_types.empty()) {
- this->supporting_types += ",";
- }
- this->supporting_types += "CI";
+ if (aln) {
+ this->supporting_types += "AL";
}
if (split) {
if (!supporting_types.empty()) {
@@ -271,70 +436,6 @@ void Breakpoint::predict_SV() {
}
this->supporting_types += "SR";
}
- //this->strand = eval_strand(strand);
-}
-
-std::string Breakpoint::to_string(RefVector ref) {
-
- std::stringstream ss;
- ss << "(";
- ss << get_chr(get_coordinates().start.min_pos, ref);
- ss << ":";
- ss << calc_pos(get_coordinates().start.min_pos, ref);
- ss << "-";
- ss << get_chr(get_coordinates().stop.max_pos, ref);
- ss << ":";
- ss << calc_pos(get_coordinates().stop.max_pos, ref);
- ss << " ";
- ss << positions.support.size();
- ss << " ";
- ss << this->sv_debug;
- ss << " ";
- ss << this->get_strand(2);
- ss << "\n";
- int num = 0;
- for (std::map<std::string, read_str>::iterator i = positions.support.begin(); i != positions.support.end() && num < Parameter::Instance()->report_n_reads; i++) {
- ss << "\t";
- ss << (*i).first;
- ss << " ";
- ss << (*i).second.type;
- if ((*i).second.strand.first) {
- ss << "+";
- } else {
- ss << "-";
- }
- if ((*i).second.strand.second) {
- ss << "+";
- } else {
- ss << "-";
- }
- num++;
- ss << "\n";
- }
- ss << " ";
- return ss.str();
-}
-
-void Breakpoint::add_read(Breakpoint * point) {
- if (point != NULL) {
-
- this->positions.start.min_pos = min(this->positions.start.min_pos, point->get_coordinates().start.min_pos);
- this->positions.start.max_pos = max(this->positions.start.max_pos, point->get_coordinates().start.max_pos);
-
- this->positions.stop.min_pos = min(this->positions.stop.min_pos, point->get_coordinates().stop.min_pos);
- this->positions.stop.max_pos = max(this->positions.stop.max_pos, point->get_coordinates().stop.max_pos);
- bool flag = false;
- std::map<std::string, read_str> support = point->get_coordinates().support;
- for (std::map<std::string, read_str>::iterator i = support.begin(); i != support.end(); i++) {
- if (this->positions.support[(*i).first].SV & INS) {
- flag = true;
- }
- this->positions.support[(*i).first] = (*i).second;
- }
- if (flag) {
- this->length = (this->length + point->get_length()) / 2;
- }
- }
}
std::string Breakpoint::get_chr(long pos, RefVector ref) {
@@ -390,14 +491,17 @@ std::string Breakpoint::rev_complement(std::string seq) {
}
std::string Breakpoint::get_strand(int num_best) {
- //if(this->strand.empty()){
- // predict_SV();
- //}
+//if(this->strand.empty()){
+// predict_SV();
+//}
if (sv_type == ' ') {
// std::cout<<"was not set"<<std::endl;
calc_support();
predict_SV();
}
+ if (this->strand.empty()) {
+ return "UNDEF";
+ }
std::string tmp = this->strand[0];
for (int i = 1; i < num_best; i++) {
tmp += '\t';
@@ -409,4 +513,59 @@ std::string Breakpoint::get_strand(int num_best) {
}
return tmp;
}
+#include "Detect_Breakpoints.h"
+std::string Breakpoint::to_string(){
+ std::stringstream ss;
+ ss << "\t\tTREE: ";
+ ss << TRANS_type(this->get_SVtype());
+ ss << " ";
+ ss << get_coordinates().start.min_pos;
+ ss << ":";
+ ss << get_coordinates().stop.max_pos;
+ ss << " ";
+ ss << positions.support.size();
+ ss << " ";
+ ss << get_strand(2);
+ return ss.str();
+}
+std::string Breakpoint::to_string(RefVector ref) {
+
+ std::stringstream ss;
+ ss << "(";
+ ss << get_chr(get_coordinates().start.min_pos, ref);
+ ss << ":";
+ ss << calc_pos(get_coordinates().start.min_pos, ref);
+ ss << "-";
+ ss << get_chr(get_coordinates().stop.max_pos, ref);
+ ss << ":";
+ ss << calc_pos(get_coordinates().stop.max_pos, ref);
+ ss << " ";
+ ss << positions.support.size();
+ ss << " ";
+ ss << this->sv_debug;
+ ss << " ";
+ ss << this->get_strand(2);
+ ss << "\n";
+ int num = 0;
+ for (std::map<std::string, read_str>::iterator i = positions.support.begin(); i != positions.support.end() && num < Parameter::Instance()->report_n_reads; i++) {
+ ss << "\t";
+ ss << (*i).first;
+ ss << " ";
+ ss << (*i).second.type;
+ if ((*i).second.strand.first) {
+ ss << "+";
+ } else {
+ ss << "-";
+ }
+ if ((*i).second.strand.second) {
+ ss << "+";
+ } else {
+ ss << "-";
+ }
+ num++;
+ ss << "\n";
+ }
+ ss << " ";
+ return ss.str();
+}
diff --git a/src/sub/Breakpoint.h b/src/sub/Breakpoint.h
index fc6e902..ce27f73 100644
--- a/src/sub/Breakpoint.h
+++ b/src/sub/Breakpoint.h
@@ -13,15 +13,11 @@
#include <iostream>
#include <sstream>
#include <math.h>
+#include <vector>
#include "../Paramer.h"
#include "../BamParser.h"
#include "../tree/BinTree.h"
-const unsigned char DEL = 0x01; // hex for 0000 0001
-const unsigned char DUP = 0x02; // hex for 0000 0010
-const unsigned char INS = 0x04; // hex for 0000 0100
-const unsigned char INV = 0x08; // hex for 0000 1000
-const unsigned char TRA = 0x10; // hex for 0001 0000
struct region_ref_str{ //not very nice!
std::string read_seq;
@@ -40,6 +36,7 @@ struct svs_breakpoint_str{
struct read_str {
//to identify
std::string name;
+ long id;
region_ref_str aln; //maybe we can use this!
short type; //split reads, cigar or md string
//for later assessment:
@@ -60,9 +57,15 @@ struct position_str {
};
+struct str_types{
+ bool is_SR;
+ bool is_ALN;
+};
+
//TODO define region object and inherit from that. Plus define avoid region objects for mappability problems.
class Breakpoint {
private:
+ str_types type;
position_str positions;
std::vector<std::string> strand;
std::string supporting_types;
@@ -70,7 +73,6 @@ private:
std::string sv_debug;
std::string ref_seq;
std::vector<short> support;
- double cov;
short type_support;
//for phasing:
BinTree grouped;
@@ -85,17 +87,17 @@ private:
std::string rev_complement(std::string seq);
bool is_in(short id);
std::string translate_strand(pair<bool, bool> strand);
+ bool is_same_strand(Breakpoint * tmp);
+ bool check_SVtype(Breakpoint * break1, Breakpoint * break2);
public:
- Breakpoint(position_str sv, int cov,long len) {
+ Breakpoint(position_str sv,long len) {
sv_type=' ';
+ type.is_ALN=((*sv.support.begin()).second.type==0);
+ type.is_SR=((*sv.support.begin()).second.type==1);
type_support=-1;
this->positions = sv;
- //std::cout << "Break1: " << positions.start << " " << positions.stop << std::endl;
-
- //std::cout << "Break2: " << positions.start << " " << positions.stop<< std::endl;
- this->cov = cov;
this->grouped_node=NULL;
- this->set_length(len);
+ this->length=len;
}
~Breakpoint() {
@@ -111,9 +113,6 @@ public:
void add_read(Breakpoint * point);
- double get_cov() {
- return cov;
- }
std::string get_chr(long pos, RefVector ref);
long calc_pos(long pos, RefVector ref);
char get_SVtype();
@@ -144,6 +143,12 @@ public:
return tmp;
}
void calc_support();
+ str_types get_types(){
+ return this->type;
+ }
+ std::vector< std::string> get_read_names(int num);
+ std::vector<int> get_read_ids();
+ std::string to_string();
};
#endif /* SUB_BREAKPOINT_H_ */
diff --git a/src/sub/Detect_Breakpoints.cpp b/src/sub/Detect_Breakpoints.cpp
index 87002bf..43b672e 100644
--- a/src/sub/Detect_Breakpoints.cpp
+++ b/src/sub/Detect_Breakpoints.cpp
@@ -6,44 +6,77 @@
*/
#include "Detect_Breakpoints.h"
-#include "../tree/IntervallTree.h"
-#include "../tree/TNode.h"
+std::string TRANS_type(char type) {
+ string tmp;
+ if (type & DEL) {
+ tmp += "DEL";
+ }
+ if (type & INV) {
+ if (!tmp.empty()) {
+ tmp += '/';
+ }
+ tmp += "INV";
+ }
+ if (type & DUP) {
+ if (!tmp.empty()) {
+ tmp += '/';
+ }
+ tmp += "DUP";
+ }
+ if (type & INS) {
+ if (!tmp.empty()) {
+ tmp += '/';
+ }
+ tmp += "INS";
+ }
+ if (type & TRA) {
+ if (!tmp.empty()) {
+ tmp += '/';
+ }
+ tmp += "TRA";
+ }
+
+ return tmp; // should not occur!
+}
long get_ref_lengths(int id, RefVector ref) {
long length = 0;
for (size_t i = 0; i < (size_t) id && i < ref.size(); i++) {
- length += ref[i].RefLength + Parameter::Instance()->max_dist;
+ length += (long) ref[i].RefLength + (long) Parameter::Instance()->max_dist;
}
return length;
}
bool should_be_stored(Breakpoint *& point) {
- point->calc_support();
- if (point->get_SVtype() & TRA) {
+ point->calc_support(); // we need that before:
+ if (point->get_SVtype() & TRA) { // we cannot make assumptions abut support yet.
return (point->get_support() > 2); // this is needed as we take each chr independently and just look at the primary alignment
- } else {
+ } else if (point->get_support() > Parameter::Instance()->min_support) {
point->predict_SV();
return (point->get_support() > Parameter::Instance()->min_support && point->get_length() > Parameter::Instance()->min_length);
}
+
+ return false;
}
-void flush_tree(IntervallTree & local_tree, TNode *& local_root, IntervallTree & final, TNode *& root_final, long pos) {
- IntervallTree tmp_tree;
- TNode *tmp_root = NULL;
- std::vector<Breakpoint *> points;
- local_tree.get_breakpoints(local_root, points);
- clarify(points);
- for (int i = 0; i < points.size(); i++) {
- if (abs(pos - points[i]->get_coordinates().start.min_pos) < 100000 || abs(pos - points[i]->get_coordinates().stop.max_pos) < 100000) { //TODO arbitrary threshold!
- tmp_tree.insert(points[i], tmp_root);
- } else if (points[i]->get_support() > Parameter::Instance()->min_support && points[i]->get_length() > Parameter::Instance()->min_length) {
- final.insert(points[i], root_final);
- }
- }
- local_tree.makeempty(local_root);
- local_tree = tmp_tree;
- local_root = tmp_root;
-}
+/*
+ void flush_tree(IntervallTree & local_tree, TNode *& local_root, IntervallTree & final, TNode *& root_final, long pos) {
+ IntervallTree tmp_tree;
+ TNode *tmp_root = NULL;
+ std::vector<Breakpoint *> points;
+ local_tree.get_breakpoints(local_root, points);
+ clarify(points);
+ for (int i = 0; i < points.size(); i++) {
+ if (abs(pos - points[i]->get_coordinates().start.min_pos) < 100000 || abs(pos - points[i]->get_coordinates().stop.max_pos) < 100000) { //TODO arbitrary threshold!
+ tmp_tree.insert(points[i], tmp_root);
+ } else if (points[i]->get_support() > Parameter::Instance()->min_support && points[i]->get_length() > Parameter::Instance()->min_length) {
+ final.insert(points[i], root_final);
+ }
+ }
+ local_tree.makeempty(local_root);
+ local_tree = tmp_tree;
+ local_root = tmp_root;
+ }*/
void detect_breakpoints(std::string read_filename, IPrinter *& printer) {
estimate_parameters(read_filename);
@@ -57,8 +90,8 @@ void detect_breakpoints(std::string read_filename, IPrinter *& printer) {
exit(0);
}
//Using PlaneSweep to comp coverage and iterate through reads:
- //PlaneSweep * sweep = new PlaneSweep();
- std::cout << "start parsing..." << std::endl;
+//PlaneSweep * sweep = new PlaneSweep();
+ std::cout << "Start parsing..." << std::endl;
//Using Interval tree to store and manage breakpoints:
IntervallTree final;
@@ -67,19 +100,31 @@ void detect_breakpoints(std::string read_filename, IPrinter *& printer) {
IntervallTree bst;
TNode *root = NULL;
-
+ //FILE * alt_allel_reads;
+ FILE * ref_allel_reads;
+ if (Parameter::Instance()->genotype) {
+ std::string output = Parameter::Instance()->tmp_file.c_str();
+ output += "ref_allele";
+ ref_allel_reads = fopen(output.c_str(), "wb");
+
+// output = Parameter::Instance()->tmp_file.c_str();
+ //output += "alt_allele";
+ //alt_allel_reads = fopen(output.c_str(), "wb");
+ }
Alignment * tmp_aln = mapped_file->parseRead(Parameter::Instance()->min_mq);
long ref_space = get_ref_lengths(tmp_aln->getRefID(), ref);
- while (!tmp_aln->getSequence().first.empty()) {
-
+ std::string prev = "test";
+ std::string curr = "wtf";
+ long num_reads = 0;
+ while (!tmp_aln->getQueryBases().empty()) {
if ((tmp_aln->getAlignment()->IsPrimaryAlignment()) && (!(tmp_aln->getAlignment()->AlignmentFlag & 0x800) && tmp_aln->get_is_save())) {
//flush_tree(local_tree, local_root, final, root_final);
if (current_RefID != tmp_aln->getRefID()) { // Regular scan through the SV and move those where the end point lies far behind the current pos or reads. Eg. 1MB?
current_RefID = tmp_aln->getRefID();
ref_space = get_ref_lengths(tmp_aln->getRefID(), ref);
- std::cout << "Switch Chr " << ref[tmp_aln->getRefID()].RefName << " " << ref[tmp_aln->getRefID()].RefLength << std::endl;
+ std::cout << "\tSwitch Chr " << ref[tmp_aln->getRefID()].RefName << " " << ref[tmp_aln->getRefID()].RefLength << std::endl;
std::vector<Breakpoint *> points;
- // clarify(points);
+ // clarify(points);
bst.get_breakpoints(root, points);
for (int i = 0; i < points.size(); i++) {
if (should_be_stored(points[i])) {
@@ -92,70 +137,110 @@ void detect_breakpoints(std::string read_filename, IPrinter *& printer) {
}
bst.makeempty(root);
}
- std::vector<str_event> cigar_event;
- std::vector<str_event> md_event;
+ std::vector<str_event> aln_event;
std::vector<aln_str> split_events;
-
if (tmp_aln->getMappingQual() > Parameter::Instance()->min_mq) {
+ double score = tmp_aln->get_scrore_ratio();
#pragma omp parallel // starts a new team
{
#pragma omp sections
{
{
- //if (Parameter::Instance()->useMD_CIGAR) {
- cigar_event = tmp_aln->get_events_CIGAR();
- //}
- }
-#pragma omp section
- {
- //if (Parameter::Instance()->useMD_CIGAR) {
- md_event = tmp_aln->get_events_MD(20);
- //}
+ // clock_t begin = clock();
+ if ((score == -1 || score > Parameter::Instance()->score_treshold)) {
+ aln_event = tmp_aln->get_events_Aln();
+ }
+ // Parameter::Instance()->meassure_time(begin, " Alignment ");
}
#pragma omp section
{
+ //TODO ignore Splits that are shorter then XYbp??
+ // clock_t begin_split = clock();
split_events = tmp_aln->getSA(ref);
+ // Parameter::Instance()->meassure_time(begin_split," Split reads ");
}
}
}
- tmp_aln->set_supports_SV((cigar_event.empty() && md_event.empty()) && split_events.empty());
-
- //sweep->add_read(tmp_aln);
-
- //maybe flush the tree after each chr.....?
+ //tmp_aln->set_supports_SV(aln_event.empty() && split_events.empty());
+
+ str_read tmp;
+ tmp.SV_support = !(aln_event.empty() && split_events.empty());
+ if ((Parameter::Instance()->genotype && !tmp.SV_support) && (score == -1 || score > Parameter::Instance()->score_treshold)) {
+ //write read:
+ tmp.chr = ref[tmp_aln->getRefID()].RefName;
+ tmp.start = tmp_aln->getPosition();
+ tmp.length = tmp_aln->getRefLength();
+ tmp.SV_support = false;
+ fwrite(&tmp, sizeof(struct str_read), 1, ref_allel_reads);
+ }
+ /*else { // we store the reads that support the SVs in IPrinter when writing out the SVs.
+ for (size_t i = 0; i < split_events.size(); i++) {
+ tmp.chr = ref[split_events[i].RefID].RefName;
+ tmp.start = split_events[i].pos;
+ tmp.length = split_events[i].length;
+ fwrite(&tmp, sizeof(struct str_read), 1, alt_allel_reads);
+ }
+ if (split_events.empty()) { //splits store the primary aln as well!
+ tmp.chr = ref[tmp_aln->getRefID()].RefName;
+ tmp.start = tmp_aln->getPosition();
+ tmp.length = tmp_aln->getRefLength();
+ fwrite(&tmp, sizeof(struct str_read), 1, alt_allel_reads);
+ }
+ }*/
+
+ // clock_t begin = clock();
+ //maybe just store the extreme intervals for coverage -> If the cov doubled within Xbp or were the coverage is 0.
+ if (!aln_event.empty()) {
+ add_events(tmp_aln, aln_event, 0, ref_space, bst, root, num_reads);
+ }
+ // Parameter::Instance()->meassure_time(begin, " add event ");
+ //cout<<"event: "<<aln_event.size()<<endl;
+ // begin = clock();
+ if (!split_events.empty()) {
+ add_splits(tmp_aln, split_events, 1, ref, bst, root, num_reads);
- int cov = 0; //sweep->get_num_reads();
+ }
+ // Parameter::Instance()->meassure_time(begin, " add split ");
- //maybe just store the extreme intervals for coverage -> If the cov doubled within Xbp or were the coverage is 0.
- add_events(tmp_aln, cigar_event, 0, ref_space, bst, root, cov, tmp_aln->getQueryBases());
- add_events(tmp_aln, md_event, 1, ref_space, bst, root, cov, tmp_aln->getQueryBases());
- add_splits(tmp_aln, split_events, 2, ref, bst, root, cov, tmp_aln->getQueryBases());
}
}
mapped_file->parseReadFast(Parameter::Instance()->min_mq, tmp_aln);
+ num_reads++;
+ if (num_reads % 10000 == 0) {
+ curr = tmp_aln->getName();
+ cout << "\t\t# Processed reads: " << num_reads << endl;
+ if (curr.size() == prev.size() && strcmp(prev.c_str(), curr.c_str()) == 0) {
+ std::cerr << "Read occurred twice with primary alignment marked. Possibly no eof recognized in bam file." << std::endl;
+ break;
+ }
+ prev = curr;
+ }
}
- //filter and copy results:
+
+// cout << "Print tree:" << std::endl;
+// bst.inorder(root);
+//filter and copy results:
std::cout << "Finalizing .." << std::endl;
std::vector<Breakpoint *> points;
- clarify(points);
bst.get_breakpoints(root, points);
for (int i = 0; i < points.size(); i++) {
if (should_be_stored(points[i])) {
if (points[i]->get_SVtype() & TRA) {
final.insert(points[i], root_final);
} else {
+
printer->printSV(points[i]);
}
}
}
bst.makeempty(root);
-
+ if (Parameter::Instance()->genotype) {
+ fclose(ref_allel_reads);
+ }
// sweep->finalyze();
points.clear();
final.get_breakpoints(root_final, points);
- //std::cout<<"Points: "<<points.size()<<endl;
- //clarify(points);
- for(size_t i =0;i<points.size();i++){
+ for (size_t i = 0; i < points.size(); i++) {
if (points[i]->get_SVtype() & TRA) {
points[i]->calc_support();
points[i]->predict_SV();
@@ -166,26 +251,26 @@ void detect_breakpoints(std::string read_filename, IPrinter *& printer) {
}
}
-void add_events(Alignment * tmp, std::vector<str_event> events, short type, long ref_space, IntervallTree & bst, TNode *&root, int cov, std::string read_seq) {
+void add_events(Alignment *& tmp, std::vector<str_event> events, short type, long ref_space, IntervallTree & bst, TNode *&root, long read_id) {
+
bool flag = (strcmp(tmp->getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0);
for (size_t i = 0; i < events.size(); i++) {
position_str svs;
- //position_str stop;
read_str read;
- //read.name = tmp->getName();
- read.type = type;
- read.SV = 0;
- //start.support.push_back(read); //not very nice!
- //stop.support.push_back(read);
+ read.type = 0;
+ read.SV = events[i].type;
+
if (flag) {
- std::cout << tmp->getName() << " " << tmp->getRefID() << " " << events[i].pos << " " << abs(events[i].length) << std::endl;
+ std::cout << "ADD EVENT " << tmp->getName() << " " << tmp->getRefID() << " " << events[i].pos << " " << abs(events[i].length) << std::endl;
}
svs.start.min_pos = (long) events[i].pos;
svs.start.min_pos += ref_space;
svs.stop.max_pos = svs.start.min_pos;
- if (events[i].length > 0) { //length ==- length for insertions!
+
+ if (!(events[i].type & INS)) { //for all events but not INS!
svs.stop.max_pos += events[i].length;
}
+
if (tmp->getStrand()) {
read.strand.first = (tmp->getStrand());
read.strand.second = !(tmp->getStrand());
@@ -195,17 +280,8 @@ void add_events(Alignment * tmp, std::vector<str_event> events, short type, long
}
// start.support[0].read_start.min = events[i].read_pos;
- if (type == 0 && events[i].length < 0) {
- read.SV |= INS; //insertion
- } else if (type == 0) {
- read.SV |= DEL; //deletion
- } else {
- read.SV |= DEL;
- read.SV |= INV;
- }
-
if (flag) {
- std::cout << tmp->getName() << " " << tmp->getRefID() << " " << svs.start.min_pos - ref_space << " " << svs.stop.max_pos - ref_space << std::endl;
+ std::cout << tmp->getName() << " " << tmp->getRefID() << " " << svs.start.min_pos << " " << svs.stop.max_pos << " " << svs.stop.max_pos - svs.start.min_pos << std::endl;
}
if (svs.start.min_pos > svs.stop.max_pos) {
@@ -220,19 +296,13 @@ void add_events(Alignment * tmp, std::vector<str_event> events, short type, long
svs.start.max_pos = svs.start.min_pos;
svs.stop.min_pos = svs.stop.max_pos;
- if (svs.start.min_pos > svs.stop.max_pos) {
- //maybe we have to invert the directions???
+ if (svs.start.min_pos > svs.stop.max_pos) { //incase they are inverted
svs_breakpoint_str pos = svs.start;
svs.start = svs.stop;
svs.stop = pos;
-
pair<bool, bool> tmp = read.strand;
-
read.strand.first = tmp.second;
read.strand.second = tmp.first;
-
- //read.strand.first = !tmp.first;
- //read.strand.second = !tmp.second;
}
//TODO: we might not need this:
@@ -243,51 +313,38 @@ void add_events(Alignment * tmp, std::vector<str_event> events, short type, long
read.coordinates.first = svs.start.min_pos;
read.coordinates.second = svs.stop.max_pos;
}
+ read.id = read_id;
svs.support[tmp->getName()] = read;
- Breakpoint * point = new Breakpoint(svs, cov, std::abs(events[i].length));
+ Breakpoint * point = new Breakpoint(svs, events[i].length);
bst.insert(point, root);
}
}
-void add_splits(Alignment * tmp, std::vector<aln_str> events, short type, RefVector ref, IntervallTree & bst, TNode *&root, int cov, std::string read_seq) {
- /* bool flag = false;
- if (Parameter::Instance()->overlaps(ref[tmp->getRefID()].RefName,
- tmp->getPosition(), tmp->getPosition() + tmp->getRefLength())) {
- Parameter::Instance()->read_name = tmp->getName();
- flag = true;
- }
- */
-
+void add_splits(Alignment *& tmp, std::vector<aln_str> events, short type, RefVector ref, IntervallTree & bst, TNode *&root, long read_id) {
bool flag = (strcmp(tmp->getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0);
- for (size_t i = 1; i < events.size() && events.size() < Parameter::Instance()->max_splits; i++) {
- if (flag) {
- std::cout << "Genome pos: " << tmp->getName() << " ";
- if (events[i - 1].strand) {
- std::cout << "+";
- } else {
- std::cout << "-";
- }
- std::cout << events[i - 1].pos << " " << events[i - 1].pos + events[i - 1].length << " p2: ";
-
- if (events[i - 1].strand) {
- std::cout << "+";
+ //flag = true;
+ if (false) {
+ cout << "SPLIT: " << std::endl;
+ for (size_t i = 0; i < events.size(); i++) {
+ std::cout << events[i].pos << " stop: " << events[i].pos + events[i].length << " " << events[i].RefID << " READ: " << events[i].read_pos_start << " " << events[i].read_pos_stop;
+ if (events[i].strand) {
+ cout << " +" << endl;
} else {
- std::cout << "-";
+ cout << " -" << endl;
}
- std::cout << events[i].pos << " " << events[i].pos + events[i].length << " Ref: " << events[i - 1].RefID << " " << events[i].RefID << std::endl;
}
+ }
+
+ for (size_t i = 1; i < events.size(); i++) {
position_str svs;
//position_str stop;
read_str read;
//read.name = tmp->getName();
- read.type = 2;
+ read.type = type;
read.SV = 0;
//stop.support.push_back(read);
- //they mimic paired end sequencing:
- if (events[i].RefID == events[i - 1].RefID) {
-
- //TODO: changed because of test:
- if (events[i - 1].strand == events[i].strand) {
+ if (events[i].RefID == events[i - 1].RefID) { //IF different chr -> tra
+ if (events[i - 1].strand == events[i].strand) { //IF same strand -> del/ins/dup
if (events[i - 1].strand) {
read.strand.first = events[i - 1].strand;
read.strand.second = !events[i].strand;
@@ -295,41 +352,78 @@ void add_splits(Alignment * tmp, std::vector<aln_str> events, short type, RefVec
read.strand.first = !events[i - 1].strand;
read.strand.second = events[i].strand;
}
-
- svs.read_start = events[i - 1].read_pos_start + events[i - 1].length;
+ int len1 = 0;
+ int len2 = 0;
+ svs.read_start = events[i - 1].read_pos_stop; // (short) events[i - 1].read_pos_start + (short) events[i - 1].length;
svs.read_stop = events[i].read_pos_start;
if (events[i - 1].strand) {
+ len1 = events[i - 1].pos;
+ len2 = events[i].pos - events[i].length;
svs.start.min_pos = events[i - 1].pos + events[i - 1].length + get_ref_lengths(events[i - 1].RefID, ref);
svs.stop.max_pos = events[i].pos + get_ref_lengths(events[i].RefID, ref);
} else {
+ len1 = events[i].pos;
+ len2 = events[i - 1].pos - events[i - 1].length;
svs.start.min_pos = events[i].pos + events[i].length + get_ref_lengths(events[i].RefID, ref);
svs.stop.max_pos = events[i - 1].pos + get_ref_lengths(events[i - 1].RefID, ref);
}
- if ((svs.start.min_pos - svs.stop.max_pos) > 100) {
- read.SV |= DUP;
- //TODO ADDED &&(svs.read_stop - svs.read_start) > (Parameter::Instance()->min_cigar_event * 2)
- } else if (abs(svs.stop.max_pos - svs.start.min_pos) + (Parameter::Instance()->min_cigar_event * 2) < (svs.read_stop - svs.read_start) && (svs.read_stop - svs.read_start) > (Parameter::Instance()->min_cigar_event * 2)) {
+ if (flag) {
+ cout << "Debug: SV_Size: " << (svs.start.min_pos - svs.stop.max_pos) << " Ref_start: " << svs.start.min_pos - get_ref_lengths(events[i].RefID, ref) << " Ref_stop: " << svs.stop.max_pos - get_ref_lengths(events[i].RefID, ref) << " readstart: " << svs.read_start << " readstop: "
+ << svs.read_stop << "readstart+len: " << len1 << " readstop+len: " << len2 << endl;
+ }
+
+ if ((svs.stop.max_pos - svs.start.min_pos) > 0 && ((svs.stop.max_pos - svs.start.min_pos) + (Parameter::Instance()->min_cigar_event) < (svs.read_stop - svs.read_start) && (svs.read_stop - svs.read_start) > (Parameter::Instance()->min_cigar_event * 2))) {
+ if (flag) {
+ cout << "INS: " << endl;
+ }
+ //read.SV = 'n'; //TODO redefine criteria!
read.SV |= INS;
- } else if (abs(svs.stop.max_pos - svs.start.min_pos) > (svs.read_stop - svs.read_start) + (Parameter::Instance()->min_cigar_event * 2)) {
+ /* if (events[i - 1].strand) { //TODO check f
+ svs.start.min_pos -=events[i - 1].length;
+ }else{
+ svs.start.min_pos -=events[i].length;
+ }*/
+ // cout<<"Split INS: "<<events[i - 1].length<<" "<<(svs.stop.max_pos - svs.start.min_pos)<<" "<<svs.read_stop - svs.read_start<<endl;
+ } else if ((svs.start.min_pos - svs.stop.max_pos) * -1 > (svs.read_stop - svs.read_start) + (Parameter::Instance()->min_cigar_event)) {
read.SV |= DEL;
+ if (flag) {
+ cout << "DEL" << endl;
+ }
+ } else if ((svs.start.min_pos - svs.stop.max_pos) > Parameter::Instance()->min_cigar_event) {
+ read.SV |= DUP;
+ if (flag) {
+ cout << "DUP: " << endl;
+ }
+ //TODO ADDED &&(svs.read_stop - svs.read_start) > (Parameter::Instance()->min_cigar_event)
} else {
+ if (flag) {
+ cout << "N" << endl;
+ }
read.SV = 'n';
}
- } else { // if first part of read is in a different direction as the second part-> INV
+ } else { // if first part of read is in a different direction as the second part-> INV
+ if (flag) {
+ cout << "INV" << endl;
+ }
+ /*if ((svs.start.min_pos - svs.stop.max_pos) > Parameter::Instance()->min_cigar_event) {
+ //TODO =>DUP!
+ std::cout << " SPLIT INV DUP!" << std::endl;
+ }else{
+ std::cout << "SPLIT INV"<<std::endl;
+ }*/
read.strand.first = events[i - 1].strand;
read.strand.second = !events[i].strand;
read.SV |= INV;
if (events[i - 1].strand) {
- //std::cout<<events[i].pos<<"\t"<<events[i].RefID<<"\t"<<get_ref_lengths(events[i].RefID, ref)<<endl;
svs.start.min_pos = events[i - 1].pos + events[i - 1].length + get_ref_lengths(events[i - 1].RefID, ref);
- svs.stop.max_pos = events[i].pos + events[i].length + get_ref_lengths(events[i].RefID, ref);
+ svs.stop.max_pos = (events[i].pos + events[i].length) + get_ref_lengths(events[i].RefID, ref);
} else {
-
svs.start.min_pos = events[i - 1].pos + get_ref_lengths(events[i - 1].RefID, ref);
svs.stop.max_pos = events[i].pos + get_ref_lengths(events[i].RefID, ref);
}
}
+
} else { //if not on the same chr-> TRA
read.strand.first = events[i - 1].strand;
read.strand.second = !events[i].strand;
@@ -354,7 +448,7 @@ void add_splits(Alignment * tmp, std::vector<aln_str> events, short type, RefVec
}
if (flag) {
- std::cout << tmp->getName() << " start: " << svs.start.min_pos << " stop: " << svs.stop.max_pos;
+ std::cout << "SPLIT: " << TRANS_type(read.SV) << " start: " << svs.start.min_pos << " stop: " << svs.stop.max_pos; //- get_ref_lengths(events[i].RefID, ref);
if (events[i - 1].strand) {
std::cout << " +";
} else {
@@ -365,7 +459,7 @@ void add_splits(Alignment * tmp, std::vector<aln_str> events, short type, RefVec
} else {
std::cout << " -";
}
- std::cout << std::endl;
+ std::cout << " " << tmp->getName() << std::endl;
}
if (read.SV != 'n') {
//std::cout<<"split"<<std::endl;
@@ -382,9 +476,6 @@ void add_splits(Alignment * tmp, std::vector<aln_str> events, short type, RefVec
read.strand.first = tmp.second;
read.strand.second = tmp.first;
-
- //read.strand.first = !tmp.first;
- //read.strand.second = !tmp.second;
}
//TODO: we might not need this:
@@ -396,9 +487,13 @@ void add_splits(Alignment * tmp, std::vector<aln_str> events, short type, RefVec
read.coordinates.second = svs.stop.max_pos;
}
+ //pool out?
+ read.id = read_id;
svs.support[tmp->getName()] = read;
- Breakpoint * point = new Breakpoint(svs, cov, events[i].length);
+ Breakpoint * point = new Breakpoint(svs, events[i].length);
+
bst.insert(point, root);
+ // breakpoints_tmp1.push_back(pair<position_str,int> (svs, events[i].length)); TODO we could create 2 loops: 1st: collect evidence 2nd: further interpretation of complex Var
}
}
}
@@ -411,6 +506,7 @@ void clarify(std::vector<Breakpoint *> & points) {
}
void estimate_parameters(std::string read_filename) {
+ cout << "Estimating parameter..." << endl;
BamParser * mapped_file = 0;
RefVector ref;
if (read_filename.find("bam") != string::npos) {
@@ -426,40 +522,77 @@ void estimate_parameters(std::string read_filename) {
double avg_score = 0;
double avg_mis = 0;
double avg_indel = 0;
-
- while (!tmp_aln->getSequence().first.empty() && num < 1000) {
-
- if ((tmp_aln->getAlignment()->IsPrimaryAlignment()) && (!(tmp_aln->getAlignment()->AlignmentFlag & 0x800) && tmp_aln->get_is_save())) {
-
- //get score ratio
- double score = tmp_aln->get_scrore_ratio();
- if (score != -1) {
- avg_score += score;
- } else {
- avg_score += avg_score / num;
+ double avg_diffs_perwindow = 0;
+ vector<int> mis_per_window; //histogram over #differences
+ vector<int> scores;
+ std::string curr, prev = "";
+ double avg_dist=0;
+ while (!tmp_aln->getQueryBases().empty() && num < 1000) { //1000
+
+ //I want to know avg. distance between events + #events within 100bp.
+
+ if (rand() % 100 < 10 && ((tmp_aln->getAlignment()->IsPrimaryAlignment()) && (!(tmp_aln->getAlignment()->AlignmentFlag & 0x800)))) { //}&& tmp_aln->get_is_save()))) {
+ //1. check differences in window => min_treshold for scanning!
+ //2. get score ration without checking before hand! (above if!)
+ double dist=0;
+ vector<int> tmp = tmp_aln->get_avg_diff(dist);
+ // std::cout<<"tmp: "<<dist<<std::endl;
+ avg_dist+=dist;
+ for (size_t i = 0; i < tmp.size(); i++) {
+ while (tmp[i] + 1 > mis_per_window.size()) { //adjust length
+ mis_per_window.push_back(0);
+ }
+ mis_per_window[tmp[i]]++;
}
- //cout<<"Para:\t"<<score;
- //get avg mismatches
- std::string md = tmp_aln->get_md();
- if (!md.empty()) {
- avg_mis += tmp_aln->get_num_mismatches(md);
- //cout<<"\t"<<tmp_aln->get_num_mismatches(md);
+ //avg_diffs_perwindow+=tmp_aln->get_avg_diff();
+ //get score ratio
+ double score = round(tmp_aln->get_scrore_ratio());
+ while (score + 1 > scores.size()) {
+ scores.push_back(0);
}
- //cigar threshold: (without 1!)
- avg_indel += tmp_aln->get_avg_indel_length_Cigar();
- //cout<<"\t"<<tmp_aln->get_avg_indel_length_Cigar()<<endl;
+ scores[score]++;
num++;
}
mapped_file->parseReadFast(Parameter::Instance()->min_mq, tmp_aln);
+ curr = tmp_aln->getName();
+ if (curr.size() == prev.size() && strcmp(prev.c_str(), curr.c_str()) == 0) {
+ std::cerr << "Read occurred twice with primary alignment marked. Possibly no eof recognized in bam file." << std::endl;
+ break;
+ }
+ prev = curr;
+ }
+ avg_dist=avg_dist/num;
+ std::cout<<"Dist: "<<avg_dist<<std::endl;
+ Parameter::Instance()->avg_distance=avg_dist/2;
+ vector<int> nums;
+ size_t pos = 0;
+ Parameter::Instance()->window_thresh = 25;
+ if (!mis_per_window.empty()) {
+ for (size_t i = 0; i < mis_per_window.size(); i++) {
+ if (mis_per_window[i] != 0) {
+ // std::cout << i << ": " << mis_per_window[i] << std::endl;
+ }
+ for (size_t j = 0; j < mis_per_window[i]; j++) {
+ nums.push_back(i);
+ }
+ }
+ pos = nums.size() * 0.95; //the highest 5% cutoff
+ if (pos <= nums.size()) {
+ std::cout << "thres: " << nums[pos] << " " << 25 << std::endl;
+ Parameter::Instance()->window_thresh = std::max(25, nums[pos]); //just in case we have too clean data! :)
+ }
+ nums.clear();
}
- std::cout << avg_indel / num << std::endl;
-
- Parameter::Instance()->min_num_mismatches = 0.3; //(avg_mis / num) * 0.3; //previously: 0.3
- Parameter::Instance()->min_cigar_event = 40; //(avg_indel / num) * 20; //previously: 20
- Parameter::Instance()->score_treshold = 2; //(avg_score / num) / 2; //previously: 2 //2
-
- std::cout << "score: " << Parameter::Instance()->score_treshold << std::endl;
- std::cout << "md: " << Parameter::Instance()->min_num_mismatches << std::endl;
- std::cout << "indel: " << Parameter::Instance()->min_cigar_event << std::endl;
+ for (size_t i = 0; i < scores.size(); i++) {
+ for (size_t j = 0; j < scores[i]; j++) {
+ nums.push_back(i);
+ }
+ }
+ pos = nums.size() * 0.05; //the lowest 5% cuttoff
+ Parameter::Instance()->score_treshold = 2; //nums[pos]; //prev=2
+ std::cout << "\tMax diff in window: " << Parameter::Instance()->window_thresh << std::endl;
+ std::cout << "\tMin score ratio: " << Parameter::Instance()->score_treshold << std::endl;
+ exit ;
}
+
diff --git a/src/sub/Detect_Breakpoints.h b/src/sub/Detect_Breakpoints.h
index d2d50db..acb003c 100644
--- a/src/sub/Detect_Breakpoints.h
+++ b/src/sub/Detect_Breakpoints.h
@@ -16,6 +16,7 @@
#include "../tree/TNode.h"
#include "../Paramer.h"
#include "../print/IPrinter.h"
+
#include <iostream>
#include <omp.h>
@@ -23,7 +24,12 @@ void clarify(std::vector<Breakpoint *> & points);
void detect_breakpoints(std::string filename, IPrinter *& printer);
//void screen_for_events(Node * list,IntervallTree & bst ,TNode *&root, int cov, int lowMQ_cov,RefVector ref);
bool screen_for_events(Alignment * tmp, IntervallTree & bst, TNode *&root, RefVector ref, int cov);
-void add_events(Alignment * tmp, std::vector<str_event> events, short type, long ref_space, IntervallTree & bst, TNode *&root, int cov,std::string read_seq);
-void add_splits(Alignment * tmp, std::vector<aln_str> events, short type, RefVector ref, IntervallTree & bst, TNode *&root, int cov,std::string read_seq);
+void add_events(Alignment *& tmp, std::vector<str_event> events, short type, long ref_space, IntervallTree & bst, TNode *&root,long read_id);
+void add_splits(Alignment *& tmp, std::vector<aln_str> events, short type, RefVector ref, IntervallTree & bst, TNode *&root,long read_id);
void estimate_parameters(std::string read_filename);
+
+
+std::string TRANS_type(char type);
+
+
#endif /* SUB_DETECT_BREAKPOINTS_H_ */
diff --git a/src/sub/Detect_Breapoints.d b/src/sub/Detect_Breapoints.d
new file mode 100644
index 0000000..642c0fe
--- /dev/null
+++ b/src/sub/Detect_Breapoints.d
@@ -0,0 +1,82 @@
+src/sub/Detect_Breapoints.d: ../src/sub/Detect_Breapoints.cpp \
+ ../src/sub/Detect_Breakpoints.h ../src/sub/../BamParser.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h \
+ ../src/sub/../Alignment.h ../src/sub/../Paramer.h \
+ ../src/sub/../Parser.h ../src/sub/../plane-sweep/Plane-sweep.h \
+ ../src/sub/../plane-sweep/IContainer.h \
+ ../src/sub/../plane-sweep/Node.h ../src/sub/../plane-sweep/MyList.h \
+ ../src/sub/../plane-sweep/MyHeap.h ../src/sub/../tree/IntervallTree.h \
+ ../src/sub/../tree/TNode.h ../src/sub/../tree/../sub/Breakpoint.h
+
+../src/sub/Detect_Breakpoints.h:
+
+../src/sub/../BamParser.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h:
+
+../src/sub/../Alignment.h:
+
+../src/sub/../Paramer.h:
+
+../src/sub/../Parser.h:
+
+../src/sub/../plane-sweep/Plane-sweep.h:
+
+../src/sub/../plane-sweep/IContainer.h:
+
+../src/sub/../plane-sweep/Node.h:
+
+../src/sub/../plane-sweep/MyList.h:
+
+../src/sub/../plane-sweep/MyHeap.h:
+
+../src/sub/../tree/IntervallTree.h:
+
+../src/sub/../tree/TNode.h:
+
+../src/sub/../tree/../sub/Breakpoint.h:
diff --git a/src/sub/Detect_Breapoints.o b/src/sub/Detect_Breapoints.o
new file mode 100644
index 0000000..90caaa5
Binary files /dev/null and b/src/sub/Detect_Breapoints.o differ
diff --git a/src/sub/subdir.mk b/src/sub/subdir.mk
new file mode 100644
index 0000000..52e7d27
--- /dev/null
+++ b/src/sub/subdir.mk
@@ -0,0 +1,24 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+# Add inputs and outputs from these tool invocations to the build variables
+CPP_SRCS += \
+../src/sub/Detect_Breapoints.cpp
+
+OBJS += \
+./src/sub/Detect_Breapoints.o
+
+CPP_DEPS += \
+./src/sub/Detect_Breapoints.d
+
+
+# Each subdirectory must supply rules for building sources it contributes
+src/sub/%.o: ../src/sub/%.cpp
+ @echo 'Building file: $<'
+ @echo 'Invoking: Cross G++ Compiler'
+ g++ -I/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/ -I/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include -O3 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<"
+ @echo 'Finished building: $<'
+ @echo ' '
+
+
diff --git a/src/subdir.mk b/src/subdir.mk
new file mode 100644
index 0000000..628d7e8
--- /dev/null
+++ b/src/subdir.mk
@@ -0,0 +1,33 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+# Add inputs and outputs from these tool invocations to the build variables
+CPP_SRCS += \
+../src/Alignment.cpp \
+../src/BamParser.cpp \
+../src/Parser.cpp \
+../src/Sniffles.cpp
+
+OBJS += \
+./src/Alignment.o \
+./src/BamParser.o \
+./src/Parser.o \
+./src/Sniffles.o
+
+CPP_DEPS += \
+./src/Alignment.d \
+./src/BamParser.d \
+./src/Parser.d \
+./src/Sniffles.d
+
+
+# Each subdirectory must supply rules for building sources it contributes
+src/%.o: ../src/%.cpp
+ @echo 'Building file: $<'
+ @echo 'Invoking: Cross G++ Compiler'
+ g++ -I/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/ -I/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include -O3 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<"
+ @echo 'Finished building: $<'
+ @echo ' '
+
+
diff --git a/src/test b/src/test
new file mode 100644
index 0000000..e69de29
diff --git a/src/tree/Breakpoint_Tree.cpp b/src/tree/Breakpoint_Tree.cpp
new file mode 100644
index 0000000..54488ad
--- /dev/null
+++ b/src/tree/Breakpoint_Tree.cpp
@@ -0,0 +1,257 @@
+/*
+ * Breakpoint_Tree.cpp
+ *
+ * Created on: Mar 28, 2016
+ * Author: fsedlaze
+ */
+
+#include "Breakpoint_Tree.h"
+void Breakpoint_Tree::find(int position, std::string chr, breakpoint_node *par, breakpoint_node *&loc) {
+ if (par == NULL) { //not found
+ loc = NULL;
+ par = NULL;
+ return;
+ }
+ if (position == par->position && strcmp(chr.c_str(), par->chr.c_str()) == 0) { //found
+ loc = par;
+ par = NULL;
+ return;
+ }
+ //search goes on:
+ if (position < par->position) {
+ find(position, chr, par->left, loc);
+ } else {
+ find(position, chr, par->right, loc);
+ }
+}
+
+void Breakpoint_Tree::overalps(int start, int stop, std::string chr, breakpoint_node *par, bool SV_support) {
+ if (par == NULL) { //not found
+
+ par = NULL;
+ return;
+ }
+ /*std::string chr2="II";
+ int pos=275800;
+ if ((pos+100 > start && pos-100 < stop) && strcmp(chr.c_str(), chr2.c_str()) == 0) { //found
+ std::cout<<"hit: "<<start<<std::endl;
+ }*/
+ if ((par->position + 100 > start && par->position - 100 < stop) && strcmp(chr.c_str(), par->chr.c_str()) == 0) { //found
+ // if (SV_support) { Maybe for later!
+ //par->SV_support++;
+ //} else {
+ par->ref_support++;
+ //}
+ //par = NULL;
+ //return; //this does not really work for close/phased SVs!
+ }
+ //search goes on:
+ if (start < par->position) {
+ overalps(start, stop, chr, par->left, SV_support);
+ } else {
+ overalps(start, stop, chr, par->right, SV_support);
+ }
+}
+
+/*
+ * Inserting Element into the Tree
+ */
+void Breakpoint_Tree::insert(breakpoint_node *&tree, std::string chr, int position) {
+ if (tree == NULL) {
+ tree = new breakpoint_node;
+ tree->position = position;
+ tree->ref_support = 0;
+ tree->chr = chr;
+ tree->left = NULL;
+ tree->right = NULL;
+ } else if (tree->position > position) {
+ insert(tree->left, chr, position);
+ } else if (tree->position < position) {
+ insert(tree->right, chr, position);
+ } else if (strcmp(chr.c_str(), tree->chr.c_str()) == 0) { // found element -> already exist!
+ //std::cerr << "Element exists!" << std::endl;
+ //TODO we should use this information to assess the reliability of this call!
+ } else {
+ insert(tree->left, chr, position); //think about that!
+ }
+}
+
+int Breakpoint_Tree::get_ref(breakpoint_node *&tree, std::string chr, int position) {
+ if (tree == NULL) {
+ return -1;
+ }
+ if (tree->position > position) {
+ return get_ref(tree->left, chr, position);
+ } else if (tree->position < position) {
+ return get_ref(tree->right, chr, position);
+ } else if (strcmp(chr.c_str(), tree->chr.c_str()) == 0) { // found element
+ return tree->ref_support;
+ } else{
+ return get_ref(tree->left, chr, position); //just in case.
+ }
+}
+/*
+
+ * Delete Element from the tree
+
+ */
+
+void Breakpoint_Tree::del(int position, std::string chr) {
+ breakpoint_node *parent, *location;
+
+ if (parent == NULL) {
+ std::cout << "Tree empty" << std::endl;
+ return;
+ }
+
+ find(position, chr, parent, location);
+ if (location == NULL) {
+ std::cout << "Item not present in tree" << std::endl;
+ return;
+ }
+
+ if (location->left == NULL && location->right == NULL) {
+ case_a(parent, location);
+ }
+ if (location->left != NULL && location->right == NULL) {
+ case_b(parent, location);
+ }
+ if (location->left == NULL && location->right != NULL) {
+ case_b(parent, location);
+ }
+ if (location->left != NULL && location->right != NULL) {
+ case_c(parent, location);
+ }
+ delete location;
+}
+
+/*
+
+ * Case A
+
+ */
+
+void Breakpoint_Tree::case_a(breakpoint_node *par, breakpoint_node *loc) {
+ if (par == NULL) {
+ loc = NULL;
+ } else {
+ if (loc == par->left) {
+ par->left = NULL;
+ } else {
+ par->right = NULL;
+ }
+ }
+}
+
+/*
+
+ * Case B
+
+ */
+
+void Breakpoint_Tree::case_b(breakpoint_node *par, breakpoint_node *loc) {
+ breakpoint_node *child;
+ if (loc->left != NULL) {
+ child = loc->left;
+ } else {
+ child = loc->right;
+ }
+ if (par == NULL) {
+ loc = child;
+ } else {
+ if (loc == par->left) {
+ par->left = child;
+ } else {
+ par->right = child;
+ }
+ }
+}
+
+/*
+
+ * Case C
+
+ */
+
+void Breakpoint_Tree::case_c(breakpoint_node *par, breakpoint_node *loc) {
+ breakpoint_node *ptr, *ptrsave, *suc, *parsuc;
+ ptrsave = loc;
+ ptr = loc->right;
+ while (ptr->left != NULL) {
+ ptrsave = ptr;
+ ptr = ptr->left;
+ }
+ suc = ptr;
+ parsuc = ptrsave;
+ if (suc->left == NULL && suc->right == NULL) {
+ case_a(parsuc, suc);
+ } else {
+ case_b(parsuc, suc);
+ }
+
+ if (par == NULL) {
+ loc = suc;
+ } else {
+ if (loc == par->left) {
+ par->left = suc;
+ } else {
+ par->right = suc;
+ }
+ }
+ suc->left = loc->left;
+ suc->right = loc->right;
+}
+
+/*
+
+ * Pre Order Traversal
+
+ */
+
+void Breakpoint_Tree::preorder(breakpoint_node *ptr) {
+ if (ptr == NULL) {
+ std::cout << "Tree is empty" << std::endl;
+ return;
+ }
+ if (ptr != NULL) {
+ std::cout << ptr->position << " ";
+ preorder(ptr->left);
+ preorder(ptr->right);
+ }
+}
+
+/*
+
+ * In Order Traversal
+
+ */
+
+void Breakpoint_Tree::inorder(breakpoint_node *ptr) {
+ if (ptr == NULL) {
+ std::cout << "Tree is empty" << std::endl;
+ return;
+ }
+ if (ptr != NULL) {
+ inorder(ptr->left);
+ std::cout << ptr->chr << " " << ptr->position << " " << ptr->ref_support << std::endl;
+ inorder(ptr->right);
+ }
+}
+
+/*
+
+ * Postorder Traversal
+
+ */
+
+void Breakpoint_Tree::postorder(breakpoint_node *ptr) {
+ if (ptr == NULL) {
+ return;
+ }
+ if (ptr != NULL) {
+ postorder(ptr->left);
+ postorder(ptr->right);
+ std::cout << ptr->position << " ";
+ }
+}
+
diff --git a/src/tree/Breakpoint_Tree.h b/src/tree/Breakpoint_Tree.h
new file mode 100644
index 0000000..db85d2a
--- /dev/null
+++ b/src/tree/Breakpoint_Tree.h
@@ -0,0 +1,50 @@
+/*
+ * Breakpoint_Tree.h
+ *
+ * Created on: Mar 28, 2016
+ * Author: fsedlaze
+ */
+
+#ifndef TREE_BREAKPOINT_TREE_H_
+#define TREE_BREAKPOINT_TREE_H_
+#include <vector>
+#include <cstddef>
+#include <iostream>
+#include <string>
+#include <string.h>
+struct breakpoint_node {
+ std::string chr;
+ int position; // value to store!
+ int ref_support;
+ breakpoint_node *left;
+ breakpoint_node *right;
+};
+
+class Breakpoint_Tree {
+private:
+
+public:
+ Breakpoint_Tree() {
+ }
+ ~Breakpoint_Tree(){
+ }
+ void find(int position,std::string chr, breakpoint_node *par, breakpoint_node *&loc);
+ void insert(breakpoint_node *&tree, std::string chr,int position);
+ void del(int position,std::string chr);
+ void case_a(breakpoint_node *par, breakpoint_node *loc);
+ void case_b(breakpoint_node *par, breakpoint_node *loc);
+ void case_c(breakpoint_node *par, breakpoint_node *loc);
+ void preorder(breakpoint_node *ptr);
+ void inorder(breakpoint_node *ptr);
+ void postorder(breakpoint_node *ptr);
+ void display(breakpoint_node *ptr, int);
+ void get_nodes(breakpoint_node *ptr, std::vector<int> & nodes);
+ void overalps(int start,int stop,std::string chr, breakpoint_node *par, bool SV_support);
+ int get_ref(breakpoint_node *&tree, std::string chr, int position);
+};
+
+
+
+
+
+#endif /* TREE_BREAKPOINT_TREE_H_ */
diff --git a/src/tree/IntervallTree.cpp b/src/tree/IntervallTree.cpp
index adf8477..daaff03 100644
--- a/src/tree/IntervallTree.cpp
+++ b/src/tree/IntervallTree.cpp
@@ -9,13 +9,14 @@
// Inserting a node
void IntervallTree::insert(Breakpoint * new_break, TNode *&p) {
+
if (p == NULL) {
p = new TNode(new_break); //TODO: this is going to be a problem!
if (p == NULL) {
std::cout << "Out of Space\n" << std::endl;
}
} else {
- long score = p->get_data()->overlap(new_break);
+ long score = p->get_data()->overlap(new_break); //comparison function
if (score > 0) {
insert(new_break, p->left);
@@ -173,23 +174,18 @@ void IntervallTree::preorder(TNode * p) {
}
void IntervallTree::get_breakpoints(TNode *p, std::vector<Breakpoint *> & points) {
if (p != NULL) {
- get_breakpoints(p->left, points);
- points.push_back(p->get_data());
get_breakpoints(p->right, points);
+ points.push_back(p->get_data());
+ get_breakpoints(p->left, points);
}
}
// Inorder Printing
-void IntervallTree::inorder(TNode * p, TNode * root) {
+void IntervallTree::inorder(TNode * p) {
if (p != NULL) {
- inorder(p->left, root);
- //std::cout << p->get_data()->to_string();
- if (p == root) {
- std::cout << "*\t";
- } else {
- std::cout << "\t";
- }
- inorder(p->right, root);
+ inorder(p->left);
+ std::cout << p->get_data()->to_string()<<endl;
+ inorder(p->right);
}
}
diff --git a/src/tree/IntervallTree.d b/src/tree/IntervallTree.d
new file mode 100644
index 0000000..81c1d79
--- /dev/null
+++ b/src/tree/IntervallTree.d
@@ -0,0 +1,67 @@
+src/tree/IntervallTree.d: ../src/tree/IntervallTree.cpp \
+ ../src/tree/IntervallTree.h ../src/tree/TNode.h \
+ ../src/tree/../sub/Breakpoint.h ../src/tree/../sub/../Paramer.h \
+ ../src/tree/../sub/../BamParser.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h \
+ ../src/tree/../sub/../Alignment.h ../src/tree/../sub/../Parser.h
+
+../src/tree/IntervallTree.h:
+
+../src/tree/TNode.h:
+
+../src/tree/../sub/Breakpoint.h:
+
+../src/tree/../sub/../Paramer.h:
+
+../src/tree/../sub/../BamParser.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h:
+
+../src/tree/../sub/../Alignment.h:
+
+../src/tree/../sub/../Parser.h:
diff --git a/src/tree/IntervallTree.h b/src/tree/IntervallTree.h
index 9b5b57f..a33fd16 100644
--- a/src/tree/IntervallTree.h
+++ b/src/tree/IntervallTree.h
@@ -29,7 +29,7 @@ public:
void copy(TNode * &, TNode *&);
TNode * nodecopy(TNode *&);
void preorder(TNode*);
- void inorder(TNode*,TNode * root);
+ void inorder(TNode * p);
void postorder(TNode*);
int bsheight(TNode*);
void get_breakpoints(TNode *p,std::vector<Breakpoint *> & points);
diff --git a/src/tree/IntervallTree.o b/src/tree/IntervallTree.o
new file mode 100644
index 0000000..6d7955d
Binary files /dev/null and b/src/tree/IntervallTree.o differ
diff --git a/src/tree/Intervall_bed.cpp b/src/tree/Intervall_bed.cpp
index 58ae010..8720883 100644
--- a/src/tree/Intervall_bed.cpp
+++ b/src/tree/Intervall_bed.cpp
@@ -17,7 +17,7 @@ void IntervallTree_bed::insert(long start, long stop, Leaf *&p) {
}
} else {
- long score = p->overlap(start, stop);
+ long score = p->overlap(start, stop); //comparison function
if (score > 0) {
insert(start, stop, p->left);
diff --git a/src/tree/Scapegoat.cpp b/src/tree/Scapegoat.cpp
deleted file mode 100644
index 2b6f9dd..0000000
--- a/src/tree/Scapegoat.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-/*
- * Scapegoat.cpp
- *
- * Created on: Jun 23, 2015
- * Author: fsedlaze
- */
-
-#include "Scapegoat.h"
-
-
diff --git a/src/tree/Scapegoat.d b/src/tree/Scapegoat.d
new file mode 100644
index 0000000..aa6ba2d
--- /dev/null
+++ b/src/tree/Scapegoat.d
@@ -0,0 +1,66 @@
+src/tree/Scapegoat.d: ../src/tree/Scapegoat.cpp ../src/tree/Scapegoat.h \
+ ../src/tree/TNode.h ../src/tree/../sub/Breakpoint.h \
+ ../src/tree/../sub/../Paramer.h ../src/tree/../sub/../BamParser.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h \
+ /Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h \
+ ../src/tree/../sub/../Alignment.h ../src/tree/../sub/../Parser.h
+
+../src/tree/Scapegoat.h:
+
+../src/tree/TNode.h:
+
+../src/tree/../sub/Breakpoint.h:
+
+../src/tree/../sub/../Paramer.h:
+
+../src/tree/../sub/../BamParser.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamMultiReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/api_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/shared/bamtools_global.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamReader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAlignment.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamAux.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamConstants.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamIndex.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamHeader.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgramChain.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamProgram.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroupDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamReadGroup.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequenceDictionary.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/SamSequence.h:
+
+/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/api/BamWriter.h:
+
+../src/tree/../sub/../Alignment.h:
+
+../src/tree/../sub/../Parser.h:
diff --git a/src/tree/Scapegoat.h b/src/tree/Scapegoat.h
deleted file mode 100644
index 94a625c..0000000
--- a/src/tree/Scapegoat.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Scapegoat.h
- *
- * Created on: Jun 23, 2015
- * Author: fsedlaze
- */
-
-#ifndef TREE_SCAPEGOAT_H_
-#define TREE_SCAPEGOAT_H_
-#include "TNode.h"
-/*
-struct TNode {
- TNode * parent;
- TNode * left;
- TNode * right;
- Breakpoint * data;
- int value;
-};*/
-
-/*
-class Scapegoat {
-private:
- TNode * root;
- int n, q;
-public:
- Scapegoat() {
- root = NULL;
- n = 0;
- }
-
- bool isEmpty() {
- return root == NULL;
- }
-
- void makeEmpty() {
- root = NULL;
- n = 0;
- }
-
- int size(TNode *r) {
- if (r == NULL)
- return 0;
- else {
- int l = 1;
- l += size(r->left);
- l += size(r->right);
- return l;
- }
- }
-
- bool search(int value, Breakpoint * point) {
- return search(root,value, point);
- }
- bool search(TNode *r, int value, Breakpoint * point) {
- bool found = false;
- while ((r != NULL) && !found) {
-
- //int score=r->get_data()->overlap(point);
- //if (score>0) {
-
- if(r->get_value() > value){
- r = r->left;
- // } else if (score<0) {
- }else if (r->get_value() < value){
- r = r->right;
- } else {
- found = true;
- break;
- }
- found = search(r, value,point);
- }
- return found;
- }
-
- int size() {
- return n;
- }
- void inorder() {
- inorder(root);
- }
-
- void inorder(TNode *r) {
- if (r != NULL) {
- inorder(r->left);
- std::cout << r->get_value() ;//->get_data()->to_string() << " ";
- if(r==this->root){
- std::cout<<"* ";
- }else{
- std::cout<<" ";
- }
- inorder(r->right);
- }
- }
-
-
- void preorder() {
- preorder(root);
- }
-
- void preorder(TNode *r) {
-
- if (r != NULL) {
- std::cout << r->get_value() <<" ";//get_data()->to_string() << " ";
- preorder(r->left);
- preorder(r->right);
- }
-
- }
-
- void postorder() {
- postorder(root);
- }
-
- void postorder(TNode *r) {
- if (r != NULL) {
- postorder(r->left);
- postorder(r->right);
- std::cout << r->get_value() <<" ";//->get_data()->to_string() << " ";
- }
- }
-
- static int const log32(int q) {
- double const log23 = 2.4663034623764317;
- return (int) ceil(log23 * log(q));
- }
-
- bool add(int value,Breakpoint * point) {
- TNode *u = new TNode(value,point);
- int d = addWithDepth(u);
- if (d > log32(q)) {
- TNode *w = u->parent;
- while (3 * size(w) <= 2 * size(w->parent)) {
- w = w->parent;
- }
- rebuild(w->parent);
- }
- return d >= 0;
- }
-
-
- void rebuild(TNode *u) {
- int ns = size(u);
- TNode *p = u->parent;
- TNode **a = new TNode*[ns];
- packIntoArray(u, a, 0);
- if (p == NULL) {
- root = buildBalanced(a, 0, ns);
- root->parent = NULL;
- } else if (p->right == u) {
- p->right = buildBalanced(a, 0, ns);
- p->right->parent = p;
- } else {
- p->left = buildBalanced(a, 0, ns);
- p->left->parent = p;
- }
- }
-
-
- int packIntoArray(TNode *u, TNode *a[], int i) {
-
- if (u == NULL) {
- return i;
- }
- i = packIntoArray(u->left, a, i);
- a[i++] = u;
- return packIntoArray(u->right, a, i);
- }
-
-
- TNode *buildBalanced(TNode **a, int i, int ns)
-
- {
- if (ns == 0) {
- return NULL;
- }
- int m = ns / 2;
- a[i + m]->left = buildBalanced(a, i, m);
- if (a[i + m]->left != NULL) {
- a[i + m]->left->parent = a[i + m];
- }
- a[i + m]->right = buildBalanced(a, i + m + 1, ns - m - 1);\
- if (a[i + m]->right != NULL) {
- a[i + m]->right->parent = a[i + m];
- }
- return a[i + m];
- }
-
-
- int addWithDepth(TNode *u) {
- TNode *w = root;
- if (w == NULL) {
- root = u;
- n++;
- q++;
- return 0;
- }
- bool done = false;
- int d = 0;
- do {
- //int score=u->get_data()->overlap(w->get_data());
- //if (score>0) {
-
- if(w->get_value() > u->get_value()){
- if (w->left == NULL) {
- w->left = u;
- u->parent = w;
- done = true;
- } else {
- w = w->left;
- }
- //} else if (score<0) {
- }else if(w->get_value() < u->get_value()){
- if (w->right == NULL) {
- w->right = u;
- u->parent = w;
- done = true;
- } else {
-
- w = w->right;
- }
- } else {
- //are equal!
- return -1;
- }
- d++;
- } while (!done);
- n++;
- q++;
- return d;
- }
-
-};
-*/
-#endif /* TREE_SCAPEGOAT_H_ */
diff --git a/src/tree/Scapegoat.o b/src/tree/Scapegoat.o
new file mode 100644
index 0000000..2d0980a
Binary files /dev/null and b/src/tree/Scapegoat.o differ
diff --git a/src/tree/subdir.mk b/src/tree/subdir.mk
new file mode 100644
index 0000000..9e331a8
--- /dev/null
+++ b/src/tree/subdir.mk
@@ -0,0 +1,27 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+# Add inputs and outputs from these tool invocations to the build variables
+CPP_SRCS += \
+../src/tree/IntervallTree.cpp \
+../src/tree/Scapegoat.cpp
+
+OBJS += \
+./src/tree/IntervallTree.o \
+./src/tree/Scapegoat.o
+
+CPP_DEPS += \
+./src/tree/IntervallTree.d \
+./src/tree/Scapegoat.d
+
+
+# Each subdirectory must supply rules for building sources it contributes
+src/tree/%.o: ../src/tree/%.cpp
+ @echo 'Building file: $<'
+ @echo 'Invoking: Cross G++ Compiler'
+ g++ -I/Users/fsedlaze/Documents/workspace/Sniffles/lib/bamtools/include/ -I/Users/fsedlaze/Documents/workspace/Sniffles/lib/tclap-1.2.1/include -O3 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<"
+ @echo 'Finished building: $<'
+ @echo ' '
+
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/sniffles.git
More information about the debian-med-commit
mailing list